Ejemplo n.º 1
0
    def __init__(self, cached=False):
        # get the global config, if cached = False a new config instance will
        # be returned with the up-to-date configuration.
        global_config = glbl_cfg(cached=cached)

        # list the condemned hosts, hosts may be suffixed with `!`
        condemned_hosts = [
            get_fqdn_by_host(host.split('!')[0])
            for host in global_config.get(['suite servers', 'condemned hosts'])
        ]

        # list configured run hosts eliminating any which cannot be contacted
        # or which are condemned
        self.hosts = []
        for host in (global_config.get(['suite servers', 'run hosts'])
                     or ['localhost']):
            try:
                if get_fqdn_by_host(host) not in condemned_hosts:
                    self.hosts.append(host)
            except socket.gaierror:
                pass

        # determine the server ranking and acceptance thresholds if configured
        self.rank_method = global_config.get(
            ['suite servers', 'run host select', 'rank'])
        self.parsed_thresholds = self.parse_thresholds(
            global_config.get(
                ['suite servers', 'run host select', 'thresholds']))
Ejemplo n.º 2
0
    def __init__(self, cached=False):
        # get the global config, if cached = False a new config instance will
        # be returned with the up-to-date configuration.
        global_config = glbl_cfg(cached=cached)

        # list the condemned hosts, hosts may be suffixed with `!`
        condemned_hosts = [
            get_fqdn_by_host(host.split('!')[0]) for host in
            global_config.get(['suite servers', 'condemned hosts'])]

        # list configured run hosts eliminating any which cannot be contacted
        # or which are condemned
        self.hosts = []
        for host in (
                global_config.get(['suite servers', 'run hosts']) or
                ['localhost']):
            try:
                if get_fqdn_by_host(host) not in condemned_hosts:
                    self.hosts.append(host)
            except socket.gaierror:
                pass

        # determine the server ranking and acceptance thresholds if configured
        self.rank_method = global_config.get(
            ['suite servers', 'run host select', 'rank'])
        self.parsed_thresholds = self.parse_thresholds(global_config.get(
            ['suite servers', 'run host select', 'thresholds']))
Ejemplo n.º 3
0
 def test_get_fqdn_by_host_on_bad_host(self):
     """get_fqdn_by_host bad host."""
     bad_host = 'nosuchhost.nosuchdomain.org'
     try:  # Future: Replace with assertRaises context manager syntax
         get_fqdn_by_host(bad_host)
     except IOError as exc:
         self.assertEqual(exc.filename, bad_host)
         self.assertEqual(
             "[Errno -2] Name or service not known: '%s'" % bad_host,
             str(exc))
Ejemplo n.º 4
0
 def test_get_fqdn_by_host_on_bad_host(self):
     """get_fqdn_by_host bad host."""
     bad_host = 'nosuchhost.nosuchdomain.org'
     try:  # Future: Replace with assertRaises context manager syntax
         get_fqdn_by_host(bad_host)
     except IOError as exc:
         self.assertEqual(exc.filename, bad_host)
         self.assertEqual(
             "[Errno -2] Name or service not known: '%s'" % bad_host,
             str(exc))
Ejemplo n.º 5
0
    def test_get_fqdn_by_host_on_bad_host(self):
        """get_fqdn_by_host bad host."""
        bad_host = 'nosuchhost.nosuchdomain.org'
        with self.assertRaisesRegex(
                IOError, r"(\[Errno -2\] Name or service|"
                r"\[Errno 8\] nodename nor servname provided, or)"
                r" not known: '{}'".format(bad_host)) as ctx:
            get_fqdn_by_host(bad_host)

        self.assertEqual(ctx.exception.filename, bad_host)
Ejemplo n.º 6
0
def get_location(suite: str, owner: str, host: str):
    """Extract host and port from a suite's contact file.

    NB: if it fails to load the suite contact file, it will exit.

    Args:
        suite (str): suite name
        owner (str): owner of the suite
        host (str): host name
    Returns:
        Tuple[str, int, int]: tuple with the host name and port numbers.
    Raises:
        ClientError: if the suite is not running.
    """
    try:
        contact = load_contact_file(suite, owner, host)
    except SuiteServiceFileError:
        raise ClientError(f'Contact info not found for suite '
                          f'"{suite}", suite not running?')

    if not host:
        host = contact[ContactFileFields.HOST]
    host = get_fqdn_by_host(host)

    port = int(contact[ContactFileFields.PORT])
    pub_port = int(contact[ContactFileFields.PUBLISH_PORT])
    return host, port, pub_port
Ejemplo n.º 7
0
Archivo: client.py Proyecto: cylc/cylc
    def get_location(cls, suite: str, owner: str, host: str):
        """Extract host and port from a suite's contact file.

        NB: if it fails to load the suite contact file, it will exit.

        Args:
            suite (str): suite name
            owner (str): owner of the suite
            host (str): host name
        Returns:
            Tuple[str, int]: tuple with the host name and port number.
        Raises:
            ClientError: if the suite is not running.
        """
        try:
            contact = SuiteSrvFilesManager().load_contact_file(
                suite, owner, host)
        except SuiteServiceFileError:
            raise ClientError(f'Contact info not found for suite '
                              f'"{suite}", suite not running?')

        if not host:
            host = contact[SuiteSrvFilesManager.KEY_HOST]
        host = get_fqdn_by_host(host)

        port = int(contact[SuiteSrvFilesManager.KEY_PORT])
        return host, port
Ejemplo n.º 8
0
def get_location(workflow: str):
    """Extract host and port from a workflow's contact file.

    NB: if it fails to load the workflow contact file, it will exit.

    Args:
        workflow (str): workflow name
    Returns:
        Tuple[str, int, int]: tuple with the host name and port numbers.
    Raises:
        ClientError: if the workflow is not running.
        CylcVersionError: if target is a Cylc 7 (or earlier) workflow.
    """
    try:
        contact = load_contact_file(workflow)
    except ServiceFileError:
        raise WorkflowStopped(workflow)

    host = contact[ContactFileFields.HOST]
    host = get_fqdn_by_host(host)
    port = int(contact[ContactFileFields.PORT])
    if ContactFileFields.PUBLISH_PORT in contact:
        pub_port = int(contact[ContactFileFields.PUBLISH_PORT])
    else:
        version = (contact['CYLC_VERSION']
                   if 'CYLC_VERSION' in contact else None)
        raise CylcVersionError(version=version)
    return host, port, pub_port
Ejemplo n.º 9
0
def test_get_fqdn_by_host_on_bad_host():
    """get_fqdn_by_host bad host.

    Warning:
        This test can fail due to ISP/network configuration
        (for example ISP may reroute failed DNS to custom search page)
        e.g: https://www.virginmedia.com/help/advanced-network-error-search

    """
    bad_host = 'nosuchhost.nosuchdomain.org'
    with pytest.raises(IOError) as exc:
        get_fqdn_by_host(bad_host)
    assert re.match(
        r"(\[Errno -2\] Name or service|"
        r"\[Errno 8\] nodename nor servname provided, or)"
        r" not known: '{}'".format(bad_host), str(exc.value))
    assert exc.value.filename == bad_host
Ejemplo n.º 10
0
def test_should_auto_restart(
        host,
        stop_mode,
        condemned_hosts,
        auto_restart_time,
        should_auto_restart
):
    """Ensure the suite only auto-restarts when appropriate."""
    # mock a scheduler object
    scheduler = Mock(
        host=get_fqdn_by_host(host),
        stop_mode=stop_mode,
        auto_restart_time=auto_restart_time
    )
    # mock a workflow configuration object
    cfg = Mock()
    cfg.get = lambda x: condemned_hosts
    # test
    assert _should_auto_restart(scheduler, cfg) == should_auto_restart
Ejemplo n.º 11
0
def _should_auto_restart(scheduler, current_glbl_cfg):
    # check if workflow host is condemned - if so auto restart
    if scheduler.stop_mode is None:
        for host in current_glbl_cfg.get(
            ['scheduler', 'run hosts', 'condemned']):
            if host.endswith('!'):
                # host ends in an `!` -> force shutdown mode
                mode = AutoRestartMode.FORCE_STOP
                host = host[:-1]
            else:
                # normal mode (stop and restart the workflow)
                mode = AutoRestartMode.RESTART_NORMAL
                if scheduler.auto_restart_time is not None:
                    # workflow is already scheduled to stop-restart only
                    # AutoRestartMode.FORCE_STOP can override this.
                    continue

            if get_fqdn_by_host(host) == scheduler.host:
                # this host is condemned, take the appropriate action

                return mode
    return False
Ejemplo n.º 12
0
def get_location(workflow: str):
    """Extract host and port from a workflow's contact file.

    NB: if it fails to load the workflow contact file, it will exit.

    Args:
        workflow (str): workflow name
    Returns:
        Tuple[str, int, int]: tuple with the host name and port numbers.
    Raises:
        ClientError: if the workflow is not running.
    """
    try:
        contact = load_contact_file(workflow)
    except ServiceFileError:
        raise WorkflowStopped(workflow)

    host = contact[ContactFileFields.HOST]
    host = get_fqdn_by_host(host)
    port = int(contact[ContactFileFields.PORT])
    pub_port = int(contact[ContactFileFields.PUBLISH_PORT])
    return host, port, pub_port
Ejemplo n.º 13
0
def select_host(hosts,
                ranking_string=None,
                blacklist=None,
                blacklist_name=None):
    """Select a host from the provided list.

    If no ranking is provided (in `ranking_string`) then random selection
    is used.

    Args:
        hosts (list):
            List of host names to choose from.
            NOTE: Host names must be identifiable from the host where the
            call is executed.
        ranking_string (str):
            A multiline string containing Python expressions to filter
            hosts by e.g::

               # only consider hosts with less than 70% cpu usage
               # and a server load of less than 5
               cpu_percent() < 70
               getloadavg()[0] < 5

            And or Python statements to rank hosts by e.g::

               # rank by used cpu, then by load average as a tie-break
               # (lower scores are better)
               cpu_percent()
               getloadavg()

            Comments are allowed using `#` but not inline comments.
        blacklist (list):
            List of host names to filter out.
            Can be short host names (do not have to be fqdn values)
        blacklist_name (str):
            The reason for blacklisting these hosts
            (used for exceptions).

    Raises:
        HostSelectException:
            In the event that no hosts are available / meet the specified
            criterion.
        socket.gaierror:
            This may be raised in the event of unknown host names
            for some installations or not for others.

    Returns:
        tuple - (hostname, fqdn) the chosen host

        hostname (str):
            The hostname as provided to this function.
        fqdn (str):
            The fully qualified domain name of this host.

    """
    # standardise host names - remove duplicate items
    hostname_map = {  # note dictionary keys filter out duplicates
        get_fqdn_by_host(host): host
        for host in hosts
    }
    hosts = list(hostname_map)
    if blacklist:
        blacklist = list(set(map(get_fqdn_by_host, blacklist)))

    # dict of conditions and whether they have been met (for error reporting)
    data = {host: {} for host in hosts}

    # filter out `filter_hosts` if provided
    if blacklist:
        hosts, data = _filter_by_hostname(hosts,
                                          blacklist,
                                          blacklist_name,
                                          data=data)

    if not hosts:
        # no hosts provided / left after filtering
        raise HostSelectException(data)

    rankings = []
    if ranking_string:
        # parse rankings
        rankings = list(_get_rankings(ranking_string))

    if not rankings:
        # no metrics or ranking required, pick host at random
        hosts = [random.choice(list(hosts))]  # nosec

    if not rankings and len(hosts) == 1:
        return hostname_map[hosts[0]], hosts[0]

    # filter and sort by rankings
    metrics = list({x for x, _ in rankings})  # required metrics
    results, data = _get_metrics(  # get data from each host
        hosts, metrics, data)
    hosts = list(results)  # some hosts might not be contactable

    # stop here if we don't need to proceed
    if not hosts:
        # no hosts provided / left after filtering
        raise HostSelectException(data)
    if not rankings and len(hosts) == 1:
        return hostname_map[hosts[0]], hosts[0]

    hosts, data = _filter_by_ranking(
        # filter by rankings, sort by ranking
        hosts,
        rankings,
        results,
        data=data)

    if not hosts:
        # no hosts provided / left after filtering
        raise HostSelectException(data)

    return hostname_map[hosts[0]], hosts[0]
Ejemplo n.º 14
0
NOTE: these are functional tests, for unit tests see the docstrings in
      the host_select module.

"""
import socket

import pytest

from cylc.flow.exceptions import HostSelectException
from cylc.flow.host_select import (select_host, select_suite_host)
from cylc.flow.hostuserutil import get_fqdn_by_host
from cylc.flow.parsec.exceptions import ListValueError

localhost, localhost_aliases, _ = socket.gethostbyname_ex('localhost')
localhost_fqdn = get_fqdn_by_host(localhost)

# NOTE: ensure that all localhost aliases are actually aliases of localhost,
#       it would appear that this is not always the case
#       on Travis-CI on of the aliases has a different fqdn from the fqdn
#       of the host it is an alias of
localhost_aliases = [
    alias for alias in localhost_aliases
    if get_fqdn_by_host(alias) == localhost_fqdn
]


def test_localhost():
    """Basic test with one host to choose from."""
    assert select_host([localhost]) == (localhost, localhost_fqdn)
Ejemplo n.º 15
0
      the host_select module.

"""
from shlex import quote
import socket
from subprocess import call, DEVNULL

import pytest

from cylc.flow.cfgspec.glbl_cfg import glbl_cfg
from cylc.flow.exceptions import HostSelectException
from cylc.flow.host_select import (select_host, select_suite_host)
from cylc.flow.hostuserutil import get_fqdn_by_host

local_host, local_host_alises, _ = socket.gethostbyname_ex('localhost')
local_host_fqdn = get_fqdn_by_host(local_host)

try:
    # get a suitable remote host for running tests on
    # NOTE: do NOT copy this testing approach in other python tests
    remote_platform = glbl_cfg().get(
        ['platforms', '_remote_background_shared_tcp', 'hosts'], [])[0]
    # don't run tests unless host is contactable
    if call(['ssh', quote(remote_platform), 'hostname'],
            stdin=DEVNULL,
            stdout=DEVNULL,
            stderr=DEVNULL):
        raise KeyError('remote platform')
    # get the fqdn for this host
    remote_platform_fqdn = get_fqdn_by_host(remote_platform)
except (KeyError, IndexError):