예제 #1
0
 def _setup_event_mail(self, itask, event):
     """Set up task event notification, by email."""
     if event in self.NON_UNIQUE_EVENTS:
         key1 = (self.HANDLER_MAIL,
                 '%s-%d' % (event, itask.non_unique_events.get(event, 1)))
     else:
         key1 = (self.HANDLER_MAIL, event)
     id_key = (key1, str(itask.point), itask.tdef.name, itask.submit_num)
     if (id_key in self.event_timers or event not in self._get_events_conf(
             itask, "mail events", [])):
         return
     retry_delays = self._get_events_conf(itask, "mail retry delays")
     if not retry_delays:
         retry_delays = [0]
     self.event_timers[id_key] = TaskActionTimer(
         TaskEventMailContext(
             self.HANDLER_MAIL,  # key
             self.HANDLER_MAIL,  # ctx_type
             self._get_events_conf(  # mail_from
                 itask,
                 "mail from",
                 "notifications@" + get_host(),
             ),
             self._get_events_conf(itask, "mail to", get_user()),  # mail_to
             self._get_events_conf(itask, "mail smtp"),  # mail_smtp
         ),
         retry_delays)
예제 #2
0
    def test_remove_bad_hosts(self):
        """Test the '_remove_bad_hosts' method.
        Test using 'localhost' only since remote host functionality is
        contained only inside remote_cylc_cmd() so is outside of the scope
        of HostAppointer.
        """
        self.mock_global_config(set_hosts=['localhost'])
        self.assertTrue(self.app._remove_bad_hosts().get('localhost', False))
        # Test 'localhost' true identifier is treated properly too.
        self.mock_global_config(set_hosts=[get_host()])
        self.assertTrue(self.app._remove_bad_hosts().get('localhost', False))

        self.mock_global_config(set_hosts=['localhost', 'FAKE_HOST'])
        # Check for no exceptions and 'localhost' but not 'FAKE_HOST' data
        # Difficult to unittest for specific stderr string; this is sufficient.
        self.assertTrue(self.app._remove_bad_hosts().get('localhost', False))
        self.assertTrue(self.app._remove_bad_hosts().get('FAKE_HOST', True))

        # Apply thresholds impossible to pass; check results in host removal.
        self.mock_global_config(
            set_hosts=['localhost'], set_thresholds='load:15 0.0')
        self.assertEqual(self.app._remove_bad_hosts(), {})
        self.mock_global_config(
            set_hosts=['localhost'], set_thresholds='memory 1000000000')
        self.assertEqual(self.app._remove_bad_hosts(), {})
예제 #3
0
def _is_local_auth_ok(reg, owner, host):
    """Return True if it is OK to use local passphrase file.

    Use values in ~/cylc-run/REG/.service/contact to make a judgement.
    """
    if is_remote(host, owner):
        fname = os.path.join(get_suite_srv_dir(reg),
                             SuiteFiles.Service.CONTACT)
        data = {}
        try:
            for line in open(fname):
                key, value = ([item.strip() for item in line.split("=", 1)])
                data[key] = value
        except (IOError, ValueError):
            # No contact file
            return False
        else:
            # Contact file exists, check values match
            if owner is None:
                owner = get_user()
            if host is None:
                host = get_host()
            host_value = data.get(ContactFileFields.HOST, "")
            return (reg == data.get(ContactFileFields.NAME)
                    and owner == data.get(ContactFileFields.OWNER)
                    and (host == host_value
                         or host == host_value.split(".", 1)[0]  # no domain
                         ))
    else:
        return True
예제 #4
0
def test_rose_fileinstall_rose_suite_cylc_install_conf(fixture_install_flow):
    _, _, _, result, destpath = fixture_install_flow
    host = get_host()
    assert (destpath / 'opt/rose-suite-cylc-install.conf').read_text() == (
        "# This file records CLI Options.\n\n"
        "!opts=A B\n\n"
        "[env]\n"
        "FOO=42\n"
        f"#{ROSE_ORIG_HOST_INSTALLED_OVERRIDE_STRING}\n"
        f"ROSE_ORIG_HOST={host}\n\n"
        "[jinja2:suite.rc]\n"
        "BAR=84\n"
        "CORNETTO=120\n"
        "FLAKE=99\n"
        f"#{ROSE_ORIG_HOST_INSTALLED_OVERRIDE_STRING}\n"
        f"ROSE_ORIG_HOST={host}\n")
예제 #5
0
 def _run_event_mail(self, config, ctx):
     """Helper for "run_event_handlers", do mail notification."""
     if ctx.event in self.get_events_conf(config, 'mail events', []):
         # SMTP server
         env = dict(os.environ)
         mail_smtp = self.get_events_conf(config, 'mail smtp')
         if mail_smtp:
             env['smtp'] = mail_smtp
         subject = '[suite %(event)s] %(suite)s' % {
             'suite': ctx.suite, 'event': ctx.event}
         stdin_str = ''
         for name, value in [
                 ('suite event', ctx.event),
                 ('reason', ctx.reason),
                 ('suite', ctx.suite),
                 ('host', ctx.host),
                 ('port', ctx.port),
                 ('owner', ctx.owner)]:
             if value:
                 stdin_str += '%s: %s\n' % (name, value)
         mail_footer_tmpl = self.get_events_conf(config, 'mail footer')
         if mail_footer_tmpl:
             stdin_str += (mail_footer_tmpl + '\n') % {
                 'host': ctx.host,
                 'port': ctx.port,
                 'owner': ctx.owner,
                 'suite': ctx.suite}
         proc_ctx = SubProcContext(
             (self.SUITE_EVENT_HANDLER, ctx.event),
             [
                 'mail',
                 '-s', subject,
                 '-r', self.get_events_conf(
                     config,
                     'mail from', 'notifications@' + get_host()),
                 self.get_events_conf(config, 'mail to', get_user()),
             ],
             env=env,
             stdin_str=stdin_str)
         if self.proc_pool.closed:
             # Run command in foreground if process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(proc_ctx)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, self._run_event_mail_callback)
예제 #6
0
파일: suite_events.py 프로젝트: cylc/cylc
 def _run_event_mail(self, config, ctx):
     """Helper for "run_event_handlers", do mail notification."""
     if ctx.event in self.get_events_conf(config, 'mail events', []):
         # SMTP server
         env = dict(os.environ)
         mail_smtp = self.get_events_conf(config, 'mail smtp')
         if mail_smtp:
             env['smtp'] = mail_smtp
         subject = '[suite %(event)s] %(suite)s' % {
             'suite': ctx.suite, 'event': ctx.event}
         stdin_str = ''
         for name, value in [
                 ('suite event', ctx.event),
                 ('reason', ctx.reason),
                 ('suite', ctx.suite),
                 ('host', ctx.host),
                 ('port', ctx.port),
                 ('owner', ctx.owner)]:
             if value:
                 stdin_str += '%s: %s\n' % (name, value)
         mail_footer_tmpl = self.get_events_conf(config, 'mail footer')
         if mail_footer_tmpl:
             stdin_str += (mail_footer_tmpl + '\n') % {
                 'host': ctx.host,
                 'port': ctx.port,
                 'owner': ctx.owner,
                 'suite': ctx.suite}
         proc_ctx = SubProcContext(
             (self.SUITE_EVENT_HANDLER, ctx.event),
             [
                 'mail',
                 '-s', subject,
                 '-r', self.get_events_conf(
                     config,
                     'mail from', 'notifications@' + get_host()),
                 self.get_events_conf(config, 'mail to', get_user()),
             ],
             env=env,
             stdin_str=stdin_str)
         if self.proc_pool.closed:
             # Run command in foreground if process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(proc_ctx)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, self._run_event_mail_callback)
예제 #7
0
 def _send_mail(self, event, subject, message, schd, env):
     proc_ctx = SubProcContext(
         (self.WORKFLOW_EVENT_HANDLER, event),
         [
             'mail',
             '-s', subject,
             '-r', self.get_events_conf(
                 schd.config,
                 'from', 'notifications@' + get_host()),
             self.get_events_conf(schd.config, 'to', get_user()),
         ],
         env=env,
         stdin_str=message)
     if self.proc_pool.closed:
         # Run command in foreground if process pool is closed
         self.proc_pool.run_command(proc_ctx)
         self._run_event_handlers_callback(proc_ctx)
     else:
         # Run command using process pool otherwise
         self.proc_pool.put_command(
             proc_ctx, callback=self._run_event_mail_callback)
예제 #8
0
    def cache_passphrase(self, reg, owner, host, value):
        """Cache and dump passphrase for a remote suite in standard location.

        Save passphrase to ~/.cylc/auth/owner@host/reg if possible.
        This is normally called on a successful authentication, and will cache
        the remote passphrase in memory as well.
        """
        if owner is None:
            owner = get_user()
        if host is None:
            host = get_host()
        path = self._get_cache_dir(reg, owner, host)
        self.cache[self.FILE_BASE_PASSPHRASE][(reg, owner, host)] = value
        # Dump to a file only for remote suites loaded via SSH.
        if self.can_disk_cache_passphrases.get((reg, owner, host)):
            # Although not desirable, failing to dump the passphrase to a file
            # is not disastrous.
            try:
                self._dump_item(path, self.FILE_BASE_PASSPHRASE, value)
            except (IOError, OSError):
                if cylc.flow.flags.debug:
                    import traceback
                    traceback.print_exc()
예제 #9
0
    def cache_passphrase(self, reg, owner, host, value):
        """Cache and dump passphrase for a remote suite in standard location.

        Save passphrase to ~/.cylc/auth/owner@host/reg if possible.
        This is normally called on a successful authentication, and will cache
        the remote passphrase in memory as well.
        """
        if owner is None:
            owner = get_user()
        if host is None:
            host = get_host()
        path = self._get_cache_dir(reg, owner, host)
        self.cache[self.FILE_BASE_PASSPHRASE][(reg, owner, host)] = value
        # Dump to a file only for remote suites loaded via SSH.
        if self.can_disk_cache_passphrases.get((reg, owner, host)):
            # Although not desirable, failing to dump the passphrase to a file
            # is not disastrous.
            try:
                self._dump_item(path, self.FILE_BASE_PASSPHRASE, value)
            except (IOError, OSError):
                if cylc.flow.flags.debug:
                    import traceback
                    traceback.print_exc()
예제 #10
0
    def _is_local_auth_ok(self, reg, owner, host):
        """Return True if it is OK to use local passphrase file.

        Use values in ~/cylc-run/REG/.service/contact to make a judgement.
        Cache results in self.can_use_load_auths.
        """
        if (reg, owner, host) not in self.can_use_load_auths:
            if is_remote(host, owner):
                fname = os.path.join(
                    self.get_suite_srv_dir(reg), self.FILE_BASE_CONTACT)
                data = {}
                try:
                    for line in open(fname):
                        key, value = (
                            [item.strip() for item in line.split("=", 1)])
                        data[key] = value
                except (IOError, ValueError):
                    # No contact file
                    self.can_use_load_auths[(reg, owner, host)] = False
                else:
                    # Contact file exists, check values match
                    if owner is None:
                        owner = get_user()
                    if host is None:
                        host = get_host()
                    host_value = data.get(self.KEY_HOST, "")
                    self.can_use_load_auths[(reg, owner, host)] = (
                        reg == data.get(self.KEY_NAME) and
                        owner == data.get(self.KEY_OWNER) and
                        (
                            host == host_value or
                            host == host_value.split(".", 1)[0]  # no domain
                        )
                    )
            else:
                self.can_use_load_auths[(reg, owner, host)] = True
        return self.can_use_load_auths[(reg, owner, host)]
예제 #11
0
    def _is_local_auth_ok(self, reg, owner, host):
        """Return True if it is OK to use local passphrase file.

        Use values in ~/cylc-run/REG/.service/contact to make a judgement.
        Cache results in self.can_use_load_auths.
        """
        if (reg, owner, host) not in self.can_use_load_auths:
            if is_remote(host, owner):
                fname = os.path.join(
                    self.get_suite_srv_dir(reg), self.FILE_BASE_CONTACT)
                data = {}
                try:
                    for line in open(fname):
                        key, value = (
                            [item.strip() for item in line.split("=", 1)])
                        data[key] = value
                except (IOError, ValueError):
                    # No contact file
                    self.can_use_load_auths[(reg, owner, host)] = False
                else:
                    # Contact file exists, check values match
                    if owner is None:
                        owner = get_user()
                    if host is None:
                        host = get_host()
                    host_value = data.get(self.KEY_HOST, "")
                    self.can_use_load_auths[(reg, owner, host)] = (
                        reg == data.get(self.KEY_NAME) and
                        owner == data.get(self.KEY_OWNER) and
                        (
                            host == host_value or
                            host == host_value.split(".", 1)[0]  # no domain
                        )
                    )
            else:
                self.can_use_load_auths[(reg, owner, host)] = True
        return self.can_use_load_auths[(reg, owner, host)]
예제 #12
0
    def get_auth_item(self, item, reg, owner=None, host=None, content=False):
        """Locate/load passphrase, SSL private key, SSL certificate, etc.

        Return file name, or content of file if content=True is set.
        Files are searched from these locations in order:

        1/ For running task jobs, service directory under:
           a/ $CYLC_SUITE_RUN_DIR for remote jobs.
           b/ $CYLC_SUITE_RUN_DIR_ON_SUITE_HOST for local jobs or remote jobs
              with SSH messaging.

        2/ (Passphrases only) From memory cache, for remote suite passphrases.
           Don't use if content=False.

        3/ For suite on local user@host. The suite service directory.

        4/ Location under $HOME/.cylc/ for remote suite control from accounts
           that do not actually need the suite definition directory to be
           installed:
           $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/

        5/ For remote suites, try locating the file from the suite service
           directory on remote owner@host via SSH. If content=False, the value
           of the located file will be dumped under:
           $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/

        """
        if item not in [
                self.FILE_BASE_PASSPHRASE, self.FILE_BASE_CONTACT,
                self.FILE_BASE_CONTACT2
        ]:
            raise ValueError("%s: item not recognised" % item)
        if item == self.FILE_BASE_PASSPHRASE:
            self.can_disk_cache_passphrases[(reg, owner, host)] = False

        if reg == os.getenv('CYLC_SUITE_NAME'):
            env_keys = []
            if 'CYLC_SUITE_RUN_DIR' in os.environ:
                # 1(a)/ Task messaging call.
                env_keys.append('CYLC_SUITE_RUN_DIR')
            elif self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST in os.environ:
                # 1(b)/ Task messaging call via ssh messaging.
                env_keys.append(self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST)
            for key in env_keys:
                path = os.path.join(os.environ[key], self.DIR_BASE_SRV)
                if content:
                    value = self._load_local_item(item, path)
                else:
                    value = self._locate_item(item, path)
                if value:
                    return value
        # 2/ From memory cache
        if item in self.cache:
            my_owner = owner
            my_host = host
            if my_owner is None:
                my_owner = get_user()
            if my_host is None:
                my_host = get_host()
            try:
                return self.cache[item][(reg, my_owner, my_host)]
            except KeyError:
                pass
        # 3/ Local suite service directory
        if self._is_local_auth_ok(reg, owner, host):
            path = self.get_suite_srv_dir(reg)
            if content:
                value = self._load_local_item(item, path)
            else:
                value = self._locate_item(item, path)
            if value:
                return value
        # 4/ Disk cache for remote suites
        if owner is not None and host is not None:
            paths = [self._get_cache_dir(reg, owner, host)]
            short_host = host.split('.', 1)[0]
            if short_host != host:
                paths.append(self._get_cache_dir(reg, owner, short_host))
            for path in paths:
                if content:
                    value = self._load_local_item(item, path)
                else:
                    value = self._locate_item(item, path)
                if value:
                    return value

        # 5/ Use SSH to load content from remote owner@host
        # Note: It is not possible to find ".service/contact2" on the suite
        # host, because it is installed on task host by "cylc remote-init" on
        # demand.
        if item != self.FILE_BASE_CONTACT2:
            value = self._load_remote_item(item, reg, owner, host)
            if value:
                if item == self.FILE_BASE_PASSPHRASE:
                    self.can_disk_cache_passphrases[(reg, owner, host)] = True
                if not content:
                    path = self._get_cache_dir(reg, owner, host)
                    self._dump_item(path, item, value)
                    value = os.path.join(path, item)
                return value

        raise SuiteServiceFileError("Couldn't get %s" % item)
예제 #13
0
"""

import os
import pytest
import re
import shutil
import subprocess

from pathlib import Path
from uuid import uuid4

from cylc.flow.hostuserutil import get_host
from cylc.flow.pathutil import get_workflow_run_dir

HOST = get_host()


@pytest.fixture(scope='module')
def monkeymodule():
    from _pytest.monkeypatch import MonkeyPatch
    mpatch = MonkeyPatch()
    yield mpatch
    mpatch.undo()


@pytest.fixture(scope='module')
def fixture_provide_flow(tmp_path_factory):
    """Provide a cylc workflow based on the contents of a folder which can
    be either validated or installed.
    """
예제 #14
0
def test_is_remote_host_on_localhost():
    """is_remote_host with localhost."""
    assert not is_remote_host(None)
    assert not is_remote_host('localhost')
    assert not is_remote_host(os.getenv('HOSTNAME'))
    assert not is_remote_host(get_host())
예제 #15
0
 def _run_event_mail(self, config, ctx):
     """Helper for "run_event_handlers", do mail notification."""
     if ctx.event in self.get_events_conf(config, 'mail events', []):
         # SMTP server
         env = dict(os.environ)
         mail_smtp = self.get_events_conf(config, 'smtp')
         if mail_smtp:
             env['smtp'] = mail_smtp
         subject = '[workflow %(event)s] %(workflow)s' % {
             'workflow': ctx.workflow,
             'event': ctx.event
         }
         stdin_str = ''
         for name, value in [('workflow event', ctx.event),
                             ('reason', ctx.reason),
                             ('workflow', ctx.workflow), ('host', ctx.host),
                             ('port', ctx.port), ('owner', ctx.owner)]:
             if value:
                 stdin_str += '%s: %s\n' % (name, value)
         mail_footer_tmpl = self.get_events_conf(config, 'footer')
         if mail_footer_tmpl:
             # BACK COMPAT: "suite" deprecated
             # url:
             #     https://github.com/cylc/cylc-flow/pull/4174
             # from:
             #     Cylc 8
             # remove at:
             #     Cylc 9
             try:
                 stdin_str_footer = (mail_footer_tmpl + '\n') % {
                     'host': ctx.host,
                     'port': ctx.port,
                     'owner': ctx.owner,
                     'suite': ctx.workflow,  # deprecated
                     'workflow': ctx.workflow
                 }
             except KeyError:
                 LOG.warning("Ignoring bad mail footer template: %s" %
                             (mail_footer_tmpl))
             else:
                 stdin_str += stdin_str_footer
         proc_ctx = SubProcContext(
             (self.WORKFLOW_EVENT_HANDLER, ctx.event), [
                 'mail',
                 '-s',
                 subject,
                 '-r',
                 self.get_events_conf(config, 'from',
                                      'notifications@' + get_host()),
                 self.get_events_conf(config, 'to', get_user()),
             ],
             env=env,
             stdin_str=stdin_str)
         if self.proc_pool.closed:
             # Run command in foreground if process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(proc_ctx)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, callback=self._run_event_mail_callback)
예제 #16
0
def get_rose_vars_from_config_node(config, config_node, environ):
    """Load template variables from a Rose config node.

    This uses only the provided config node and environment variables
    - there is no system interaction.

    Args:
        config (dict):
            Object which will be populated with the results.
        config_node (metomi.rose.config.ConfigNode):
            Configuration node representing the Rose suite configuration.
        environ (dict):
            Dictionary of environment variables

    """
    templating = None
    sections = {'jinja2:suite.rc', 'empy:suite.rc', 'template variables'}

    # Don't allow multiple templating sections.
    defined_sections = sections.intersection(set(config_node.value))
    if len(defined_sections) > 1:
        raise MultipleTemplatingEnginesError(
            "You should not define more than one templating section. "
            f"You defined:\n\t{'; '.join(defined_sections)}")
    elif len(defined_sections) == 1:
        templating, = defined_sections
        if templating != 'template variables':
            config['templating_detected'] = templating.replace(':suite.rc', '')
        else:
            config['templating_detected'] = templating

    # Create env section if it doesn't already exist.
    if 'env' not in config_node.value:
        config_node.set(['env'])

    # Get Values for standard ROSE variables (ROSE_ORIG_HOST and ROSE_SITE).
    rose_orig_host = get_host()
    rose_site = ResourceLocator().get_conf().get_value(['site'], '')

    # For each section process variables and add standard variables.
    for section in ['env', templating]:
        if section not in config_node.value:
            continue

        # Add standard ROSE_VARIABLES
        config_node[section].set(['ROSE_SITE'], rose_site)
        config_node[section].set(['ROSE_VERSION'], ROSE_VERSION)
        config_node[section].set(['ROSE_ORIG_HOST'], rose_orig_host)

        # Use env_var_process to process variables which may need expanding.
        for key, node in config_node.value[section].value.items():
            try:
                config_node.value[section].value[key].value = env_var_process(
                    node.value, environ=environ)
                if section == 'env':
                    environ[key] = node.value
            except UnboundEnvironmentVariableError as exc:
                raise ConfigProcessError(['env', key], node.value, exc)

    # For each of the template language sections extract items to a simple
    # dict to be returned.
    if 'env' in config_node.value:
        config['env'] = {
            item[0][1]: item[1].value
            for item in config_node.value['env'].walk()
        }
    if templating in config_node.value:
        config['template_variables'] = {
            item[0][1]: item[1].value
            for item in config_node.value[templating].walk()
        }
    elif 'template variables' in config_node.value:
        config['template_variables'] = {
            item[0][1]: item[1].value
            for item in config_node.value['template variables'].walk()
        }

    # Add the entire config to ROSE_SUITE_VARIABLES to allow for programatic
    # access.
    if templating is not None:
        parser = Parser()
        for key, value in config['template_variables'].items():
            # The special variables are already Python variables.
            if key not in ['ROSE_ORIG_HOST', 'ROSE_VERSION', 'ROSE_SITE']:
                try:
                    config['template_variables'][key] = (
                        parser.literal_eval(value))
                except Exception:
                    raise ConfigProcessError(
                        [templating, key], value,
                        f'Invalid template variable: {value}'
                        '\nMust be a valid Python or Jinja2 literal'
                        ' (note strings "must be quoted").') from None

    # Add ROSE_SUITE_VARIABLES to config of templating engines in use.
    if templating is not None:
        config['template_variables']['ROSE_SUITE_VARIABLES'] = config[
            'template_variables']
예제 #17
0
def record_cylc_install_options(
    rundir=None,
    opts=None,
    srcdir=None,
):
    """Create/modify files recording Cylc install config options.

    Creates a new config based on CLI options and writes it to the workflow
    install location as ``rose-suite-cylc-install.conf``.

    If ``rose-suite-cylc-install.conf`` already exists over-writes changed
    items, except for ``!opts=`` which is merged and simplified.

    If ``!opts=`` have been changed these are appended to those that have
    been written in the installed ``rose-suite.conf``.

    Args:
        srcdir (pathlib.Path):
            Used to check whether the source directory contains a rose config.
        opts:
            Cylc option parser object - we want to extract the following
            values:
            - opt_conf_keys (list or str):
                Equivalent of ``rose suite-run --option KEY``
            - defines (list of str):
                Equivalent of ``rose suite-run --define KEY=VAL``
            - rose_template_vars (list of str):
                Equivalent of ``rose suite-run --define-suite KEY=VAL``
        rundir (pathlib.Path):
            Path to dump the rose-suite-cylc-conf

    Returns:
        cli_config - Config Node which has been dumped to
        ``rose-suite-cylc-install.conf``.
        rose_suite_conf['opts'] - Opts section of the config node dumped to
        installed ``rose-suite.conf``.
    """
    # Create a config based on command line options:
    cli_config = get_cli_opts_node(opts, srcdir)
    # Leave now if there is nothing to do:
    if not cli_config:
        return False

    # Construct path objects representing our target files.
    (Path(rundir) / 'opt').mkdir(exist_ok=True)
    conf_filepath = Path(rundir) / 'opt/rose-suite-cylc-install.conf'
    rose_conf_filepath = Path(rundir) / 'rose-suite.conf'
    dumper = ConfigDumper()
    loader = ConfigLoader()

    # If file exists we need to merge with our new config, over-writing with
    # new items where there are duplicates.
    if conf_filepath.is_file():
        if opts.clear_rose_install_opts:
            conf_filepath.unlink()
        else:
            oldconfig = loader.load(str(conf_filepath))
            cli_config = merge_rose_cylc_suite_install_conf(
                oldconfig, cli_config)

    # Get Values for standard ROSE variable ROSE_ORIG_HOST.
    rose_orig_host = get_host()
    for section in [
            'env', 'jinja2:suite.rc', 'empy:suite.rc', 'template variables'
    ]:
        if section in cli_config:
            cli_config[section].set(['ROSE_ORIG_HOST'], rose_orig_host)
            cli_config[section]['ROSE_ORIG_HOST'].comments = [
                ROSE_ORIG_HOST_INSTALLED_OVERRIDE_STRING
            ]

    cli_config.comments = [' This file records CLI Options.']
    identify_templating_section(cli_config)
    dumper.dump(cli_config, str(conf_filepath))

    # Merge the opts section of the rose-suite.conf with those set by CLI:
    if not rose_conf_filepath.is_file():
        rose_conf_filepath.touch()
    rose_suite_conf = loader.load(str(rose_conf_filepath))
    rose_suite_conf = add_cylc_install_to_rose_conf_node_opts(
        rose_suite_conf, cli_config)
    identify_templating_section(rose_suite_conf)
    dumper(rose_suite_conf, rose_conf_filepath)

    return cli_config, rose_suite_conf
예제 #18
0
def get_rose_vars_from_config_node(config, config_node, environ):
    """Load template variables from a Rose config node.

    This uses only the provided config node and environment variables
    - there is no system interaction.

    Args:
        config (dict):
            Object which will be populated with the results.
        config_node (metomi.rose.config.ConfigNode):
            Configuration node representing the Rose suite configuration.
        environ (dict):
            Dictionary of environment variables

    """
    templating = None

    # Don't allow multiple templating sections.
    templating = identify_templating_section(config_node)

    if templating != 'template variables':
        config['templating_detected'] = templating.replace(':suite.rc', '')
    else:
        config['templating_detected'] = templating

    # Create env section if it doesn't already exist.
    if 'env' not in config_node.value:
        config_node.set(['env'])
    if templating not in config_node.value:
        config_node.set([templating])

    # Get Rose Orig host:
    rose_orig_host = get_host()

    # For each section process variables and add standard variables.
    for section in ['env', templating]:

        # This loop handles standard variables.
        # CYLC_VERSION - If it's in the config, remove it.
        # ROSE_VERSION - If it's in the config, replace it.
        # ROSE_ORIG_HOST - If it's the config, replace it, unless it has a
        # comment marking it as having been saved by ``cylc install``.
        # In all cases warn users if the value in their config is not used.
        for var_name, replace_with in [('ROSE_ORIG_HOST', rose_orig_host),
                                       ('ROSE_VERSION', ROSE_VERSION),
                                       ('CYLC_VERSION', SET_BY_CYLC)]:
            # Warn if we're we're going to override a variable:
            if override_this_variable(config_node, section, var_name):
                user_var = config_node[section].value[var_name].value
                LOG.warning(
                    f'[{section}]{var_name}={user_var} from rose-suite.conf '
                    f'will be ignored: {var_name} will be: {replace_with}')

            # Handle replacement of stored variable if appropriate:
            if replace_with == SET_BY_CYLC:
                config_node[section].unset([var_name])
            elif not rose_orig_host_set_by_cylc_install(
                    config_node, section, var_name):
                config_node[section].set([var_name], replace_with)

        # Use env_var_process to process variables which may need expanding.
        for key, node in config_node.value[section].value.items():
            try:
                config_node.value[section].value[key].value = env_var_process(
                    node.value, environ=environ)
                if section == 'env':
                    environ[key] = node.value
            except UnboundEnvironmentVariableError as exc:
                raise ConfigProcessError(['env', key], node.value, exc)

    # For each of the template language sections extract items to a simple
    # dict to be returned.
    if 'env' in config_node.value:
        config['env'] = {
            item[0][1]: item[1].value
            for item in config_node.value['env'].walk()
        }
    if templating in config_node.value:
        config['template_variables'] = {
            item[0][1]: item[1].value
            for item in config_node.value[templating].walk()
        }
    elif 'template variables' in config_node.value:
        config['template_variables'] = {
            item[0][1]: item[1].value
            for item in config_node.value['template variables'].walk()
        }

    # Add the entire config to ROSE_SUITE_VARIABLES to allow for programatic
    # access.
    if templating is not None:
        with patch_jinja2_leading_zeros():
            # BACK COMPAT: patch_jinja2_leading_zeros
            # back support zero-padded integers for a limited time to help
            # users migrate before upgrading cylc-flow to Jinja2>=3.1
            parser = Parser()
            for key, value in config['template_variables'].items():
                # The special variables are already Python variables.
                if key not in ['ROSE_ORIG_HOST', 'ROSE_VERSION', 'ROSE_SITE']:
                    try:
                        config['template_variables'][key] = (
                            parser.literal_eval(value))
                    except Exception:
                        raise ConfigProcessError(
                            [templating, key], value,
                            f'Invalid template variable: {value}'
                            '\nMust be a valid Python or Jinja2 literal'
                            ' (note strings "must be quoted").') from None

    # Add ROSE_SUITE_VARIABLES to config of templating engines in use.
    if templating is not None:
        config['template_variables']['ROSE_SUITE_VARIABLES'] = config[
            'template_variables']
예제 #19
0

"""
import os
import pytest
import shutil
import subprocess

from pathlib import Path
from shlex import split
from uuid import uuid4

from cylc.flow.pathutil import get_workflow_run_dir
from cylc.flow.hostuserutil import get_host

HOST = get_host().split('.')[0]


class SubprocessesError(Exception):
    ...


# Check that FCM is present on system, skipping checks elsewise:
try:
    subprocess.run(['fcm', '--version'])
except FileNotFoundError:
    pytest.skip("\"FCM\" not installed", allow_module_level=True)


@pytest.fixture(scope='module')
def monkeymodule():
예제 #20
0
 def test_is_remote_host_on_localhost(self):
     """is_remote_host with localhost."""
     self.assertFalse(is_remote_host(None))
     self.assertFalse(is_remote_host('localhost'))
     self.assertFalse(is_remote_host(os.getenv('HOSTNAME')))
     self.assertFalse(is_remote_host(get_host()))
예제 #21
0
파일: task_job_mgr.py 프로젝트: cylc/cylc
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs.

        Submit tasks where possible. Ignore tasks that are waiting for host
        select command to complete, or tasks that are waiting for remote
        initialisation. Bad host select command, error writing to a job file or
        bad remote initialisation will cause a bad task - leading to submission
        failure.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission.
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)

        # Reset consumed host selection results
        self.task_remote_mgr.remote_host_select_reset()

        if not prepared_tasks:
            return bad_tasks

        # Group task jobs by (host, owner)
        auth_itasks = {}  # {(host, owner): [itask, ...], ...}
        for itask in prepared_tasks:
            auth_itasks.setdefault((itask.task_host, itask.task_owner), [])
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        # Submit task jobs for each (host, owner) group
        done_tasks = bad_tasks
        for (host, owner), itasks in sorted(auth_itasks.items()):
            is_init = self.task_remote_mgr.remote_init(host, owner)
            if is_init is None:
                # Remote is waiting to be initialised
                for itask in itasks:
                    itask.set_summary_message(self.REMOTE_INIT_MSG)
                continue
            # Ensure that localhost background/at jobs are recorded as running
            # on the host name of the current suite host, rather than just
            # "localhost". On suite restart on a different suite host, this
            # allows the restart logic to correctly poll the status of the
            # background/at jobs that may still be running on the previous
            # suite host.
            if (
                self.batch_sys_mgr.is_job_local_to_host(
                    itask.summary['batch_sys_name']) and
                not is_remote_host(host)
            ):
                owner_at_host = get_host()
            else:
                owner_at_host = host
            # Persist
            if owner:
                owner_at_host = owner + '@' + owner_at_host
            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and persist
                LOG.info(
                    '[%s] -submit-num=%02d, owner@host=%s',
                    itask, itask.submit_num, owner_at_host)
                self.suite_db_mgr.put_insert_task_jobs(itask, {
                    'is_manual_submit': itask.is_manual_submit,
                    'try_num': itask.get_try_num(),
                    'time_submit': now_str,
                    'user_at_host': owner_at_host,
                    'batch_sys_name': itask.summary['batch_sys_name'],
                })
                itask.is_manual_submit = False
            if is_init == REMOTE_INIT_FAILED:
                # Remote has failed to initialise
                # Set submit-failed for all affected tasks
                for itask in itasks:
                    itask.local_job_file_path = None  # reset for retry
                    log_task_job_activity(
                        SubProcContext(
                            self.JOBS_SUBMIT,
                            '(init %s)' % owner_at_host,
                            err=REMOTE_INIT_FAILED,
                            ret_code=1),
                        suite, itask.point, itask.tdef.name)
                    self.task_events_mgr.process_message(
                        itask, CRITICAL,
                        self.task_events_mgr.EVENT_SUBMIT_FAILED)
                continue
            # Build the "cylc jobs-submit" command
            cmd = ['cylc', self.JOBS_SUBMIT]
            if LOG.isEnabledFor(DEBUG):
                cmd.append('--debug')
            if get_utc_mode():
                cmd.append('--utc-mode')
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [
                    ('host', host, is_remote_host),
                    ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append('--')
            cmd.append(glbl_cfg().get_derived_host_item(
                suite, 'suite job log directory', host, owner))
            # Chop itasks into a series of shorter lists if it's very big
            # to prevent overloading of stdout and stderr pipes.
            itasks = sorted(itasks, key=lambda itask: itask.identity)
            chunk_size = len(itasks) // ((len(itasks) // 100) + 1) + 1
            itasks_batches = [
                itasks[i:i + chunk_size] for i in range(0,
                                                        len(itasks),
                                                        chunk_size)]
            LOG.debug(
                '%s ... # will invoke in batches, sizes=%s',
                cmd, [len(b) for b in itasks_batches])
            for i, itasks_batch in enumerate(itasks_batches):
                stdin_files = []
                job_log_dirs = []
                for itask in itasks_batch:
                    if remote_mode:
                        stdin_files.append(
                            get_task_job_job_log(
                                suite, itask.point, itask.tdef.name,
                                itask.submit_num))
                    job_log_dirs.append(get_task_job_id(
                        itask.point, itask.tdef.name, itask.submit_num))
                    # The job file is now (about to be) used: reset the file
                    # write flag so that subsequent manual retrigger will
                    # generate a new job file.
                    itask.local_job_file_path = None
                    itask.state.reset_state(TASK_STATUS_READY)
                    if itask.state.outputs.has_custom_triggers():
                        self.suite_db_mgr.put_update_task_outputs(itask)
                self.proc_pool.put_command(
                    SubProcContext(
                        self.JOBS_SUBMIT,
                        cmd + job_log_dirs,
                        stdin_files=stdin_files,
                        job_log_dirs=job_log_dirs,
                        **kwargs
                    ),
                    self._submit_task_jobs_callback, [suite, itasks_batch])
        return done_tasks
예제 #22
0
    def get_auth_item(self, item, reg, owner=None, host=None, content=False):
        """Locate/load passphrase, SSL private key, SSL certificate, etc.

        Return file name, or content of file if content=True is set.
        Files are searched from these locations in order:

        1/ For running task jobs, service directory under:
           a/ $CYLC_SUITE_RUN_DIR for remote jobs.
           b/ $CYLC_SUITE_RUN_DIR_ON_SUITE_HOST for local jobs or remote jobs
              with SSH messaging.

        2/ (Passphrases only) From memory cache, for remote suite passphrases.
           Don't use if content=False.

        3/ For suite on local user@host. The suite service directory.

        4/ Location under $HOME/.cylc/ for remote suite control from accounts
           that do not actually need the suite definition directory to be
           installed:
           $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/

        5/ For remote suites, try locating the file from the suite service
           directory on remote owner@host via SSH. If content=False, the value
           of the located file will be dumped under:
           $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/

        """
        if item not in [
                self.FILE_BASE_PASSPHRASE, self.FILE_BASE_CONTACT,
                self.FILE_BASE_CONTACT2]:
            raise ValueError("%s: item not recognised" % item)
        if item == self.FILE_BASE_PASSPHRASE:
            self.can_disk_cache_passphrases[(reg, owner, host)] = False

        if reg == os.getenv('CYLC_SUITE_NAME'):
            env_keys = []
            if 'CYLC_SUITE_RUN_DIR' in os.environ:
                # 1(a)/ Task messaging call.
                env_keys.append('CYLC_SUITE_RUN_DIR')
            elif self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST in os.environ:
                # 1(b)/ Task messaging call via ssh messaging.
                env_keys.append(self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST)
            for key in env_keys:
                path = os.path.join(os.environ[key], self.DIR_BASE_SRV)
                if content:
                    value = self._load_local_item(item, path)
                else:
                    value = self._locate_item(item, path)
                if value:
                    return value
        # 2/ From memory cache
        if item in self.cache:
            my_owner = owner
            my_host = host
            if my_owner is None:
                my_owner = get_user()
            if my_host is None:
                my_host = get_host()
            try:
                return self.cache[item][(reg, my_owner, my_host)]
            except KeyError:
                pass
        # 3/ Local suite service directory
        if self._is_local_auth_ok(reg, owner, host):
            path = self.get_suite_srv_dir(reg)
            if content:
                value = self._load_local_item(item, path)
            else:
                value = self._locate_item(item, path)
            if value:
                return value
        # 4/ Disk cache for remote suites
        if owner is not None and host is not None:
            paths = [self._get_cache_dir(reg, owner, host)]
            short_host = host.split('.', 1)[0]
            if short_host != host:
                paths.append(self._get_cache_dir(reg, owner, short_host))
            for path in paths:
                if content:
                    value = self._load_local_item(item, path)
                else:
                    value = self._locate_item(item, path)
                if value:
                    return value

        # 5/ Use SSH to load content from remote owner@host
        # Note: It is not possible to find ".service/contact2" on the suite
        # host, because it is installed on task host by "cylc remote-init" on
        # demand.
        if item != self.FILE_BASE_CONTACT2:
            value = self._load_remote_item(item, reg, owner, host)
            if value:
                if item == self.FILE_BASE_PASSPHRASE:
                    self.can_disk_cache_passphrases[(reg, owner, host)] = True
                if not content:
                    path = self._get_cache_dir(reg, owner, host)
                    self._dump_item(path, item, value)
                    value = os.path.join(path, item)
                return value

        raise SuiteServiceFileError("Couldn't get %s" % item)
예제 #23
0
def get_cli_opts_node(opts=None, srcdir=None):
    """Create a ConfigNode representing options set on the command line.

    Args:
        opts (CylcOptionParser object):
            Object with values from the command line.

    Returns:
        Rose ConfigNode.

    Example:
        >>> from types import SimpleNamespace
        >>> opts = SimpleNamespace(
        ...     opt_conf_keys='A B',
        ...     defines=["[env]FOO=BAR"],
        ...     rose_template_vars=["QUX=BAZ"]
        ... )
        >>> node = get_cli_opts_node(opts)
        >>> node['opts']
        {'value': 'A B', 'state': '!', 'comments': []}
        >>> node['env']['FOO']
        {'value': 'BAR', 'state': '', 'comments': []}
        >>> node['template variables']['QUX']
        {'value': 'BAZ', 'state': '', 'comments': []}
    """
    # Unpack info we want from opts:
    opt_conf_keys = []
    defines = []
    rose_template_vars = []
    if opts and 'opt_conf_keys' in dir(opts):
        opt_conf_keys = opts.opt_conf_keys
    if opts and 'defines' in dir(opts):
        defines = opts.defines
    if opts and 'rose_template_vars' in dir(opts):
        rose_template_vars = opts.rose_template_vars

    rose_orig_host = get_host()
    defines.append(f'[env]ROSE_ORIG_HOST={rose_orig_host}')
    rose_template_vars.append(f'ROSE_ORIG_HOST={rose_orig_host}')

    # Construct new ouput based on optional Configs:
    newconfig = ConfigNode()

    # For each __define__ determine whether it is an env or template define.
    for define in defines:
        match = re.match((r'^\[(?P<key1>.*)\](?P<state>!{0,2})'
                          r'(?P<key2>.*)\s*=\s*(?P<value>.*)'),
                         define).groupdict()
        if match['key1'] == '' and match['state'] in ['!', '!!']:
            LOG.warning(
                'CLI opts set to ignored or trigger-ignored will be ignored.')
        else:
            newconfig.set(keys=[match['key1'], match['key2']],
                          value=match['value'],
                          state=match['state'])

    # For each __suite define__ add define.
    if srcdir is not None:
        config_node = rose_config_tree_loader(srcdir, opts).node
        templating = identify_templating_section(config_node)
    else:
        templating = 'template variables'

    for define in rose_template_vars:
        match = re.match(r'(?P<state>!{0,2})(?P<key>.*)\s*=\s*(?P<value>.*)',
                         define).groupdict()
        # Guess templating type?
        newconfig.set(keys=[templating, match['key']],
                      value=match['value'],
                      state=match['state'])

    # Specialised treatement of optional configs.
    if 'opts' not in newconfig:
        newconfig['opts'] = ConfigNode()
        newconfig['opts'].value = ''
    newconfig['opts'].value = merge_opts(newconfig, opt_conf_keys)
    newconfig['opts'].state = '!'

    return newconfig
예제 #24
0
    def submit_task_jobs(self,
                         suite,
                         itasks,
                         curve_auth,
                         client_pub_key_dir,
                         is_simulation=False):
        """Prepare for job submission and submit task jobs.

        Preparation (host selection, remote host init, and remote install)
        is done asynchronously. Newly released tasks may be sent here several
        times until these init subprocesses have returned. Failure during
        preparation is considered to be job submission failure.

        Once preparation has completed or failed, reset .waiting_on_job_prep in
        task instances so the scheduler knows to stop sending them back here.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission.
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)

        # Reset consumed host selection results
        self.task_remote_mgr.subshell_eval_reset()

        if not prepared_tasks:
            return bad_tasks
        auth_itasks = {}  # {platform: [itask, ...], ...}
        for itask in prepared_tasks:
            platform_name = itask.platform['name']
            auth_itasks.setdefault(platform_name, [])
            auth_itasks[platform_name].append(itask)
        # Submit task jobs for each platform
        done_tasks = bad_tasks

        for platform_name, itasks in sorted(auth_itasks.items()):
            platform = itasks[0].platform
            install_target = get_install_target_from_platform(platform)
            ri_map = self.task_remote_mgr.remote_init_map

            if (ri_map.get(install_target) != REMOTE_FILE_INSTALL_DONE):
                if install_target == get_localhost_install_target():
                    # Skip init and file install for localhost.
                    LOG.debug(f"REMOTE INIT NOT REQUIRED for {install_target}")
                    ri_map[install_target] = (REMOTE_FILE_INSTALL_DONE)

                elif install_target not in ri_map:
                    # Remote init not in progress for target, so start it.
                    self.task_remote_mgr.remote_init(platform, curve_auth,
                                                     client_pub_key_dir)
                    for itask in itasks:
                        itask.set_summary_message(self.REMOTE_INIT_MSG)
                        self.data_store_mgr.delta_job_msg(
                            get_task_job_id(itask.point, itask.tdef.name,
                                            itask.submit_num),
                            self.REMOTE_INIT_MSG)
                    continue

                elif (ri_map[install_target] == REMOTE_INIT_DONE):
                    # Already done remote init so move on to file install
                    self.task_remote_mgr.file_install(platform)
                    continue

                elif (ri_map[install_target] in self.IN_PROGRESS.keys()):
                    # Remote init or file install in progress.
                    for itask in itasks:
                        msg = self.IN_PROGRESS[ri_map[install_target]]
                        itask.set_summary_message(msg)
                        self.data_store_mgr.delta_job_msg(
                            get_task_job_id(itask.point, itask.tdef.name,
                                            itask.submit_num), msg)
                    continue

            # Ensure that localhost background/at jobs are recorded as running
            # on the host name of the current suite host, rather than just
            # "localhost". On suite restart on a different suite host, this
            # allows the restart logic to correctly poll the status of the
            # background/at jobs that may still be running on the previous
            # suite host.
            host = get_host_from_platform(platform)
            if (self.job_runner_mgr.is_job_local_to_host(
                    itask.summary['job_runner_name'])
                    and not is_remote_platform(platform)):
                host = get_host()

            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and persist
                LOG.info('[%s] -submit-num=%02d, host=%s', itask,
                         itask.submit_num, host)
                self.suite_db_mgr.put_insert_task_jobs(
                    itask, {
                        'is_manual_submit': itask.is_manual_submit,
                        'try_num': itask.get_try_num(),
                        'time_submit': now_str,
                        'platform_name': itask.platform['name'],
                        'job_runner_name': itask.summary['job_runner_name'],
                    })
                itask.is_manual_submit = False

            if (ri_map[install_target]
                    in [REMOTE_INIT_FAILED, REMOTE_FILE_INSTALL_FAILED]):
                # Remote init or install failed. Set submit-failed for all
                # affected tasks and remove target from remote init map
                # - this enables new tasks to re-initialise that target
                init_error = (ri_map[install_target])
                del ri_map[install_target]
                for itask in itasks:
                    itask.waiting_on_job_prep = False
                    itask.local_job_file_path = None  # reset for retry
                    log_task_job_activity(
                        SubProcContext(self.JOBS_SUBMIT,
                                       '(init %s)' % host,
                                       err=init_error,
                                       ret_code=1), suite, itask.point,
                        itask.tdef.name)
                    self._prep_submit_task_job_error(suite, itask,
                                                     '(remote init)', '')

                continue
            # Build the "cylc jobs-submit" command
            cmd = [self.JOBS_SUBMIT]
            if LOG.isEnabledFor(DEBUG):
                cmd.append('--debug')
            if get_utc_mode():
                cmd.append('--utc-mode')
            if is_remote_platform(itask.platform):
                remote_mode = True
                cmd.append('--remote-mode')
            else:
                remote_mode = False
            if itask.platform['clean job submission environment']:
                cmd.append('--clean-env')
            for var in itask.platform[
                    'job submission environment pass-through']:
                cmd.append(f"--env={var}")
            for path in itask.platform[
                    'job submission executable paths'] + SYSPATH:
                cmd.append(f"--path={path}")
            cmd.append('--')
            cmd.append(get_remote_suite_run_job_dir(platform, suite))
            # Chop itasks into a series of shorter lists if it's very big
            # to prevent overloading of stdout and stderr pipes.
            itasks = sorted(itasks, key=lambda itask: itask.identity)
            chunk_size = (len(itasks) // (
                (len(itasks) // platform['max batch submit size']) + 1) + 1)
            itasks_batches = [
                itasks[i:i + chunk_size]
                for i in range(0, len(itasks), chunk_size)
            ]
            LOG.debug('%s ... # will invoke in batches, sizes=%s', cmd,
                      [len(b) for b in itasks_batches])

            if remote_mode:
                cmd = construct_ssh_cmd(cmd, platform)
            else:
                cmd = ['cylc'] + cmd

            for i, itasks_batch in enumerate(itasks_batches):
                stdin_files = []
                job_log_dirs = []
                for itask in itasks_batch:
                    if remote_mode:
                        stdin_files.append(
                            os.path.expandvars(
                                get_task_job_job_log(suite, itask.point,
                                                     itask.tdef.name,
                                                     itask.submit_num)))
                    job_log_dirs.append(
                        get_task_job_id(itask.point, itask.tdef.name,
                                        itask.submit_num))
                    # The job file is now (about to be) used: reset the file
                    # write flag so that subsequent manual retrigger will
                    # generate a new job file.
                    itask.local_job_file_path = None
                    if itask.state.outputs.has_custom_triggers():
                        self.suite_db_mgr.put_update_task_outputs(itask)

                    itask.waiting_on_job_prep = False
                self.proc_pool.put_command(
                    SubProcContext(
                        self.JOBS_SUBMIT,
                        cmd + job_log_dirs,
                        stdin_files=stdin_files,
                        job_log_dirs=job_log_dirs,
                    ), self._submit_task_jobs_callback, [suite, itasks_batch])
        return done_tasks
예제 #25
0
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs.

        Submit tasks where possible. Ignore tasks that are waiting for host
        select command to complete, or tasks that are waiting for remote
        initialisation. Bad host select command, error writing to a job file or
        bad remote initialisation will cause a bad task - leading to submission
        failure.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission.
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)

        # Reset consumed host selection results
        self.task_remote_mgr.remote_host_select_reset()

        if not prepared_tasks:
            return bad_tasks

        # Group task jobs by (host, owner)
        auth_itasks = {}  # {(host, owner): [itask, ...], ...}
        for itask in prepared_tasks:
            auth_itasks.setdefault((itask.task_host, itask.task_owner), [])
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        # Submit task jobs for each (host, owner) group
        done_tasks = bad_tasks
        for (host, owner), itasks in sorted(auth_itasks.items()):
            is_init = self.task_remote_mgr.remote_init(host, owner)
            if is_init is None:
                # Remote is waiting to be initialised
                for itask in itasks:
                    itask.set_summary_message(self.REMOTE_INIT_MSG)
                    self.job_pool.add_job_msg(
                        get_task_job_id(itask.point, itask.tdef.name,
                                        itask.submit_num),
                        self.REMOTE_INIT_MSG)
                continue
            # Ensure that localhost background/at jobs are recorded as running
            # on the host name of the current suite host, rather than just
            # "localhost". On suite restart on a different suite host, this
            # allows the restart logic to correctly poll the status of the
            # background/at jobs that may still be running on the previous
            # suite host.
            if (self.batch_sys_mgr.is_job_local_to_host(
                    itask.summary['batch_sys_name'])
                    and not is_remote_host(host)):
                owner_at_host = get_host()
            else:
                owner_at_host = host
            # Persist
            if owner:
                owner_at_host = owner + '@' + owner_at_host
            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and persist
                LOG.info('[%s] -submit-num=%02d, owner@host=%s', itask,
                         itask.submit_num, owner_at_host)
                self.suite_db_mgr.put_insert_task_jobs(
                    itask, {
                        'is_manual_submit': itask.is_manual_submit,
                        'try_num': itask.get_try_num(),
                        'time_submit': now_str,
                        'user_at_host': owner_at_host,
                        'batch_sys_name': itask.summary['batch_sys_name'],
                    })
                itask.is_manual_submit = False
            if is_init == REMOTE_INIT_FAILED:
                # Remote has failed to initialise
                # Set submit-failed for all affected tasks
                for itask in itasks:
                    itask.local_job_file_path = None  # reset for retry
                    log_task_job_activity(
                        SubProcContext(self.JOBS_SUBMIT,
                                       '(init %s)' % owner_at_host,
                                       err=REMOTE_INIT_FAILED,
                                       ret_code=1), suite, itask.point,
                        itask.tdef.name)
                    self.task_events_mgr.process_message(
                        itask, CRITICAL,
                        self.task_events_mgr.EVENT_SUBMIT_FAILED)
                continue
            # Build the "cylc jobs-submit" command
            cmd = ['cylc', self.JOBS_SUBMIT]
            if LOG.isEnabledFor(DEBUG):
                cmd.append('--debug')
            if get_utc_mode():
                cmd.append('--utc-mode')
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [('host', host, is_remote_host),
                                          ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append('--')
            cmd.append(get_remote_suite_run_job_dir(host, owner, suite))
            # Chop itasks into a series of shorter lists if it's very big
            # to prevent overloading of stdout and stderr pipes.
            itasks = sorted(itasks, key=lambda itask: itask.identity)
            chunk_size = len(itasks) // ((len(itasks) // 100) + 1) + 1
            itasks_batches = [
                itasks[i:i + chunk_size]
                for i in range(0, len(itasks), chunk_size)
            ]
            LOG.debug('%s ... # will invoke in batches, sizes=%s', cmd,
                      [len(b) for b in itasks_batches])
            for i, itasks_batch in enumerate(itasks_batches):
                stdin_files = []
                job_log_dirs = []
                for itask in itasks_batch:
                    if remote_mode:
                        stdin_files.append(
                            get_task_job_job_log(suite, itask.point,
                                                 itask.tdef.name,
                                                 itask.submit_num))
                    job_log_dirs.append(
                        get_task_job_id(itask.point, itask.tdef.name,
                                        itask.submit_num))
                    # The job file is now (about to be) used: reset the file
                    # write flag so that subsequent manual retrigger will
                    # generate a new job file.
                    itask.local_job_file_path = None
                    itask.state.reset(TASK_STATUS_READY)
                    if itask.state.outputs.has_custom_triggers():
                        self.suite_db_mgr.put_update_task_outputs(itask)
                self.proc_pool.put_command(
                    SubProcContext(self.JOBS_SUBMIT,
                                   cmd + job_log_dirs,
                                   stdin_files=stdin_files,
                                   job_log_dirs=job_log_dirs,
                                   **kwargs), self._submit_task_jobs_callback,
                    [suite, itasks_batch])
        return done_tasks
예제 #26
0
 def test_is_remote_host_on_localhost(self):
     """is_remote_host with localhost."""
     self.assertFalse(is_remote_host(None))
     self.assertFalse(is_remote_host('localhost'))
     self.assertFalse(is_remote_host(os.getenv('HOSTNAME')))
     self.assertFalse(is_remote_host(get_host()))