def invoke_agent_expect_result(self, host, command, args = {}):
        from chroma_core.services.job_scheduler.agent_rpc import AgentException

        result = self.invoke_agent(host, command, args)

        # This case is to deal with upgrades, once every installation is using the new protocol then we should not allow this.
        # Once everything is 3.0 or later we will also have version information in the wrapper header.
        if (result == None) or \
                ((type(result) == dict) and ('error' not in result) and ('result' not in result)):
            job_log.info("Invalid result %s fixed up on called to %s with args %s" % (result, command, args))

            # Prior to 3.0 update_packages returned {'update_packages': data} so fix this up. This code is here so that all
            # of the legacy fixups are in one place and can easily be removed.
            if command == 'install_packages' and 'scan_packages' in result:
                result = agent_result(result['scan_packages'])
            else:
                result = agent_result(result)

        if type(result) != dict:
            raise AgentException(host.fqdn, command, args, "Expected a dictionary but got a %s when calling %s" % (type(result), command))

        if ('error' not in result) and ('result' not in result):
            raise AgentException(host.fqdn, command, args, "Expected a dictionary with 'error' or 'result' in keys but got %s when calling %s" % (result, command))

        if 'error' in result:
            self.log(result['error'])
            raise AgentException(host.fqdn, command, args, result['error'])

        return result['result']
Exemple #2
0
def start_target(ha_label):
    """
    Start the high availability target

    Return: Value using simple return protocol
    """

    if not _resource_exists(ha_label):
        return agent_error("Target {} does not exist".format(ha_label))

    # if resource already started but not on primary, move it
    location = get_resource_location(ha_label)
    primary = _find_resource_constraint(ha_label, True)
    if location:
        if location != primary:
            console_log.info(
                "Resource %s already started, moving to primary node %s",
                ha_label,
                primary,
            )
            error = _move_target(ha_label, primary)
            if error:
                return agent_error(error)
            location = primary
        return agent_result(location)

    try:
        _res_set_started(ha_label, True)
        if _resource_exists(_zfs_name(ha_label)):
            _res_set_started(_zfs_name(ha_label), True)
            # enable group also, in case group was disabled
            _res_set_started(_group_name(ha_label), True)

        # now wait for it to start
        if not _wait_target(ha_label, True):
            # try to leave things in a sane state for a failed mount
            _res_set_started(ha_label, False)

            return agent_error("Failed to start target {}".format(ha_label))

        location = get_resource_location(ha_label)
        if not location:
            return agent_error(
                "Started {} but now can't locate it!".format(ha_label))

        return agent_result(location)

    except AgentShell.CommandExecutionError as err:
        return agent_error(
            "Error (%s) running '%s': '%s' '%s'" %
            (err.result.rc, err.command, err.result.stdout, err.result.stderr))
def get_corosync_autoconfig():
    """
    Automatically detect the configuration for corosync.
    :return: dictionary containing 'result' or 'error'.
    """
    ring0 = get_shared_ring()

    if not ring0:
        return agent_error("Failed to detect ring0 interface")

    ring1_ipaddr, ring1_prefix = generate_ring1_network(ring0)

    try:
        ring1 = detect_ring1(ring0, ring1_ipaddr, ring1_prefix)
    except RingDetectionError as e:
        return agent_error(e.message)

    return agent_result({
        "interfaces": {
            ring0.name: {
                "dedicated": False,
                "ipaddr": ring0.ipv4_address,
                "prefix": ring0.ipv4_prefixlen,
            },
            ring1.name: {
                "dedicated": True,
                "ipaddr": ring1.ipv4_address,
                "prefix": ring1.ipv4_prefixlen,
            },
        },
        "mcast_port": ring1.mcastport,
    })
    def get_corosync_autoconfig(self):
        with self._lock:
            port_names = {'tcp': 'eth', 'o2ib': 'ib'}
            inet4_addresses = []
            names = []

            for inet4_address in self.network_interfaces.keys():
                interface = self.network_interfaces[inet4_address]
                inet4_addresses.append(inet4_address)
                names.append(
                    '%s%s' %
                    (port_names[interface['type']], interface['interface_no']))

            return agent_result({
                'interfaces': {
                    names[0]: {
                        'dedicated': False,
                        'ipaddr': inet4_addresses[0],
                        'prefix': 24
                    },
                    names[1]: {
                        'dedicated': True,
                        'ipaddr': inet4_addresses[1],
                        'prefix': 24
                    }
                },
                'mcast_port': self.state['corosync'].mcast_port
            })
Exemple #5
0
def get_corosync_autoconfig():
    """
    Automatically detect the configuration for corosync.
    :return: dictionary containing 'result' or 'error'.
    """
    ring0 = get_ring0()

    if not ring0:
        return agent_error('Failed to detect ring0 interface')

    ring1_ipaddr, ring1_prefix = generate_ring1_network(ring0)

    try:
        ring1 = detect_ring1(ring0, ring1_ipaddr, ring1_prefix)
    except RingDetectionError as e:
        return agent_error(e.message)

    return agent_result({
        'interfaces': {
            ring0.name: {
                'dedicated': False,
                'ipaddr': ring0.ipv4_address,
                'prefix': ring0.ipv4_prefixlen
            },
            ring1.name: {
                'dedicated': True,
                'ipaddr': ring1.ipv4_address,
                'prefix': ring1.ipv4_prefixlen
            }
        },
        'mcast_port': ring1.mcastport
    })
Exemple #6
0
def start_target(ha_label):
    '''
    Start the high availability target

    Return: Value using simple return protocol
    '''
    # HYD-1989: brute force, try up to 3 times to start the target
    i = 0
    while True:
        i += 1

        error = AgentShell.run_canned_error_message([
            'crm_resource', '-r', ha_label, '-p', 'target-role', '-m', '-v',
            'Started'
        ])

        if error:
            return agent_error(error)

        # now wait for it to start
        _wait_target(ha_label, True)

        # and make sure it didn't start but (the RA) fail(ed)
        rc, stdout, stderr = AgentShell.run_old(['crm_mon', '-1'])

        failed = True
        for line in stdout.split("\n"):
            if line.lstrip().startswith(ha_label):
                if line.find("FAILED") < 0:
                    failed = False

        if failed:
            # try to leave things in a sane state for a failed mount
            error = AgentShell.run_canned_error_message([
                'crm_resource', '-r', ha_label, '-p', 'target-role', '-m',
                '-v', 'Stopped'
            ])

            if error:
                return agent_error(error)

            if i < 4:
                console_log.info("failed to start target %s" % ha_label)
            else:
                return agent_error("Failed to start target %s" % ha_label)

        else:
            location = get_resource_location(ha_label)
            if not location:
                return agent_error("Started %s but now can't locate it!" %
                                   ha_label)
            return agent_result(location)
    def install_packages(self, repos, packages):
        for package in packages:
            try:
                self.state['packages'][
                    package] = self._simulator.available_packages(
                        self.node_type)[package]
            except KeyError:
                raise RuntimeError(
                    "Package '%s' not found (available: %s)!" %
                    (package, self._simulator.available_packages(
                        self.node_type)))

        self.save()

        return agent_result(self.scan_packages())
Exemple #8
0
    def _fake_invoke_agent(self, host, invoke, args=None):
        args = args if args is not None else {}

        assert type(args) is dict, "args list must be dict :%s" % type(args)

        args = InvokeAgentInvoke(host.fqdn, invoke, args, None, None)

        self._invokes_history.append(args)

        result = self._get_executable_invoke(args)
        result.executions_remaining -= 1

        if result.error:
            return agent_error(result.error)

        if result.result:
            return agent_result(result.result)

        return agent_result_ok
Exemple #9
0
    def test_install_packages(self):
        self.add_commands(
            CommandCaptureCommand(('dnf', 'clean', 'all', '--enablerepo=*')),
            CommandCaptureCommand(
                ('dnf', 'repoquery', '--latest-limit', '1', '--requires',
                 '--enablerepo=myrepo', 'foo', 'bar'),
                stdout="""/usr/bin/python
python >= 2.4
python(abi) = 2.6
yum >= 3.2.29
/bin/sh
kernel = 2.6.32-279.14.1.el6_lustre
lustre-backend-fs
        """),
            CommandCaptureCommand(
                ('dnf', 'install', '--allowerasing', '-y', '--exclude',
                 'kernel-debug', '--enablerepo=myrepo', 'foo', 'bar',
                 'kernel-2.6.32-279.14.1.el6_lustre')),
            CommandCaptureCommand((
                'dnf', 'repoquery',
                '--queryformat=%{name} %{version}-%{release}.%{arch} %{repoid}',
                '--upgrades', '--disablerepo=*', '--enablerepo=myrepo'),
                                  stdout="""
jasper-libs.x86_64                                                                             1.900.1-16.el6_6.3                                                                             myrepo
"""),
            CommandCaptureCommand(
                ('dnf', 'update', '--allowerasing', '-y', '--exclude',
                 'kernel-debug', '--enablerepo=myrepo', 'jasper-libs.x86_64')),
            CommandCaptureCommand(
                ('grubby', '--default-kernel'),
                stdout='/boot/vmlinuz-2.6.32-504.3.3.el6.x86_64'))

        def isfile(arg):
            return True

        with patch('os.path.isfile', side_effect=isfile):
            self.assertEqual(
                agent_updates.install_packages(['myrepo'], ['foo', 'bar']),
                agent_result({}))

        self.assertRanAllCommandsInOrder()
def install_packages(repos, packages):
    """
    Explicitly evaluate and install or update any specific-version dependencies and satisfy even if
    that involves installing an older package than is already installed.
    Primary use case is installing lustre-modules, which depends on a specific kernel package.

    :param repos: List of strings, yum repo names
    :param packages: List of strings, yum package names
    :return: package report of the format given by the lustre device plugin
    """
    if packages != []:
        yum_util('clean')

        out = yum_util('requires', enablerepo=repos, packages=packages)
        for requirement in [l.strip() for l in out.strip().split("\n")]:
            match = re.match("([^\)/]*) = (.*)", requirement)
            if match:
                require_package, require_version = match.groups()
                packages.append("%s-%s" % (require_package, require_version))

        yum_util('install', enablerepo=repos, packages=packages)

        # So now we have installed the packages requested, we will also make sure that any installed packages we
        # have that are already installed are updated to our presumably better versions.
        update_packages = yum_check_update(repos)

        if update_packages:
            daemon_log.debug(
                "The following packages need update after we installed IML packages %s"
                % update_packages)
            yum_util('update', packages=update_packages, enablerepo=repos)

        error = _check_HYD4050()

        if error:
            return agent_error(error)

    return agent_result(lustre.scan_packages())
    def _call(cls, host, cmd, args):
        cls.calls.append((cmd, args))
        cls.host_calls[host.fqdn].append((cmd, args))

        if not cls.succeed:
            cls._fail(host.fqdn)

        if (cmd, args) in cls.fail_commands:
            cls._fail(host.fqdn)

        mock_server = cls.mock_servers[host.address]

        log.info("invoke_agent %s %s %s" % (host, cmd, args))

        # This isn't really accurate because lnet is scanned asynchonously, but it is as close as we can get today
        # Fixme: Also I know think this is writing to the wrong thing and should be changing the mock_server entries.
        # to lnet_up, I guess the mock_server needs an lnet state really, rather than relying on nids present.
        if cmd == "load_lnet":
            synthetic_lnet_configuration(host, mock_server["nids"])
            return
        elif cmd == "device_plugin":
            # Only returns nid info today.
            return create_synthetic_device_info(host, mock_server,
                                                args["plugin"])
        elif cmd == "format_target":
            inode_size = None
            if "mkfsoptions" in args:
                inode_arg = re.search("-I (\d+)", args["mkfsoptions"])
                if inode_arg:
                    inode_size = int(inode_arg.group(1).__str__())

            if inode_size is None:
                # A 'foo' value
                inode_size = 777

            return {
                "uuid": uuid.uuid1().__str__(),
                "inode_count": 666,
                "inode_size": inode_size,
                "filesystem_type": "ext4",
            }
        elif cmd == "stop_target":
            ha_label = args["ha_label"]
            target = ManagedTarget.objects.get(ha_label=ha_label)
            return agent_result_ok
        elif cmd == "start_target":
            ha_label = args["ha_label"]
            target = ManagedTarget.objects.get(ha_label=ha_label)
            return agent_result(target.primary_host.nodename)
        elif cmd == "register_target":
            # Assume mount paths are "/mnt/testfs-OST0001" style
            mount_point = args["mount_point"]
            label = re.search("/mnt/([^\s]+)", mount_point).group(1)
            return {"label": label}
        elif cmd == "detect_scan":
            return mock_server["detect-scan"]
        elif cmd == "install_packages":
            return agent_result([])
        elif cmd == "register_server":
            api_client = TestApiClient()
            old_is_authenticated = CsrfAuthentication.is_authenticated
            try:
                CsrfAuthentication.is_authenticated = mock.Mock(
                    return_value=True)
                api_client.client.login(username="******",
                                        password="******")
                fqdn = cls.mock_servers[host]["fqdn"]

                response = api_client.post(
                    args["url"] + "register/%s/" % args["secret"],
                    data={
                        "address": host,
                        "fqdn": fqdn,
                        "nodename": cls.mock_servers[host]["nodename"],
                        "capabilities": ["manage_targets"],
                        "version": cls.version,
                        "csr": helper.generate_csr(fqdn),
                    },
                )
                assert response.status_code == 201
                registration_data = Serializer().deserialize(
                    response.content, format=response["Content-Type"])
                print("MockAgent.invoke returning %s" % registration_data)
                return registration_data
            finally:
                CsrfAuthentication.is_authenticated = old_is_authenticated
        elif cmd == "kernel_status":
            return {
                "running": "fake_kernel-0.1",
                "required": "fake_kernel-0.1",
                "available": ["fake_kernel-0.1"]
            }
        elif cmd == "selinux_status":
            return {"status": "Disabled"}
        elif cmd == "reboot_server":
            now = IMLDateTime.utcnow()
            log.info("rebooting %s; updating boot_time to %s" % (host, now))
            job_scheduler_notify.notify(host, now, {"boot_time": now})
        elif cmd == "which zfs":
            return 1
        elif "import platform;" in cmd:
            return "0"
        elif "socket.gethostbyname(socket.gethostname())" in cmd:
            if not mock_server["tests"]["hostname_valid"]:
                return "127.0.0.1"
            else:
                return mock_server["address"]
        elif "print os.uname()[1]" in cmd:
            return "%s\n%s" % (mock_server["nodename"], mock_server["fqdn"])
        elif "socket.getfqdn()" in cmd:
            return mock_server["fqdn"]
        elif "ping" in cmd:
            result = (0 if mock_server["tests"]["reverse_resolve"] else
                      2) + (0 if mock_server["tests"]["reverse_ping"] else 1)
            return result
        elif "ElectricFence" in cmd:
            return 0 if mock_server["tests"]["yum_can_update"] else 1
        elif "openssl version -a" in cmd:
            return 0 if mock_server["tests"]["openssl"] else 1
        elif "curl -k https" in cmd:
            return json.dumps({"host_id": host.id, "command_id": 0})
        elif cmd in [
                "configure_pacemaker",
                "unconfigure_pacemaker",
                "configure_target_store",
                "unconfigure_target_store",
                "deregister_server",
                "restart_agent",
                "shutdown_server",
                "host_corosync_config",
                "check_block_device",
                "set_conf_param",
                "purge_configuration",
        ]:
            return None
        elif cmd in [
                "configure_target_ha",
                "unconfigure_target_ha",
                "start_lnet",
                "stop_lnet",
                "unload_lnet",
                "unconfigure_lnet",
                "configure_corosync",
                "unconfigure_corosync",
                "start_corosync",
                "stop_corosync",
                "start_pacemaker",
                "stop_pacemaker",
                "configure_ntp",
                "unconfigure_ntp",
                "import_target",
                "export_target",
                "set_profile",
                "update_profile",
                "failover_target",
                "failback_target",
                "configure_network",
                "open_firewall",
                "close_firewall",
        ]:
            return agent_result_ok
        elif cmd == "get_corosync_autoconfig":
            return agent_result({
                "interfaces": {
                    "eth0": {
                        "dedicated": False,
                        "ipaddr": "192.168.0.1",
                        "prefix": 24
                    },
                    "eth1": {
                        "dedicated": True,
                        "ipaddr": "10.10.0.01",
                        "prefix": 24
                    },
                },
                "mcast_port": "666",
            })
        else:
            assert False, (
                "The %s command is not in the known list for MockAgentRpc. Please add it then when people modify it a simple text search will let them know to change it here as well."
                % cmd)
Exemple #12
0
    def _call(cls, host, cmd, args):
        cls.calls.append((cmd, args))
        cls.host_calls[host].append((cmd, args))

        if not cls.succeed:
            cls._fail(host.fqdn)

        if (cmd, args) in cls.fail_commands:
            cls._fail(host.fqdn)

        mock_server = cls.mock_servers[host.address]

        log.info("invoke_agent %s %s %s" % (host, cmd, args))

        # This isn't really accurate because lnet is scanned asynchonously, but it is as close as we can get today
        # Fixme: Also I know think this is writing to the wrong thing and should be changing the mock_server entries.
        # to lnet_up, I guess the mock_server needs an lnet state really, rather than relying on nids present.
        if cmd == "load_lnet":
            synthetic_lnet_configuration(host, mock_server['nids'])
            return
        elif cmd == "device_plugin":
            # Only returns nid info today.
            return create_synthetic_device_info(host, mock_server,
                                                args['plugin'])
        elif cmd == 'format_target':
            inode_size = None
            if 'mkfsoptions' in args:
                inode_arg = re.search("-I (\d+)", args['mkfsoptions'])
                if inode_arg:
                    inode_size = int(inode_arg.group(1).__str__())

            if inode_size is None:
                # A 'foo' value
                inode_size = 777

            return {
                'uuid': uuid.uuid1().__str__(),
                'inode_count': 666,
                'inode_size': inode_size,
                'filesystem_type': 'ext4'
            }
        elif cmd == 'stop_target':
            ha_label = args['ha_label']
            target = ManagedTarget.objects.get(ha_label=ha_label)
            return agent_result_ok
        elif cmd == 'start_target':
            ha_label = args['ha_label']
            target = ManagedTarget.objects.get(ha_label=ha_label)
            return agent_result(target.primary_host.nodename)
        elif cmd == 'register_target':
            # Assume mount paths are "/mnt/testfs-OST0001" style
            mount_point = args['mount_point']
            label = re.search("/mnt/([^\s]+)", mount_point).group(1)
            return {'label': label}
        elif cmd == 'detect_scan':
            return mock_server['detect-scan']
        elif cmd == 'install_packages':
            return agent_result([])
        elif cmd == 'register_server':
            api_client = TestApiClient()
            old_is_authenticated = CsrfAuthentication.is_authenticated
            try:
                CsrfAuthentication.is_authenticated = mock.Mock(
                    return_value=True)
                api_client.client.login(username='******',
                                        password='******')
                fqdn = cls.mock_servers[host]['fqdn']

                response = api_client.post(
                    args['url'] + "register/%s/" % args['secret'],
                    data={
                        'address': host,
                        'fqdn': fqdn,
                        'nodename': cls.mock_servers[host]['nodename'],
                        'capabilities': ['manage_targets'],
                        'version': cls.version,
                        'csr': helper.generate_csr(fqdn)
                    })
                assert response.status_code == 201
                registration_data = Serializer().deserialize(
                    response.content, format=response['Content-Type'])
                print "MockAgent.invoke returning %s" % registration_data
                return registration_data
            finally:
                CsrfAuthentication.is_authenticated = old_is_authenticated
        elif cmd == 'kernel_status':
            return {
                'running': 'fake_kernel-0.1',
                'required': 'fake_kernel-0.1',
                'available': ['fake_kernel-0.1']
            }
        elif cmd == 'reboot_server':
            now = IMLDateTime.utcnow()
            log.info("rebooting %s; updating boot_time to %s" % (host, now))
            job_scheduler_notify.notify(host, now, {'boot_time': now})
        elif 'socket.gethostbyname(socket.gethostname())' in cmd:
            if not mock_server['tests']['hostname_valid']:
                return '127.0.0.1'
            else:
                return mock_server['address']
        elif 'print os.uname()[1]' in cmd:
            return '%s\n%s' % (mock_server['nodename'], mock_server['fqdn'])
        elif 'socket.getfqdn()' in cmd:
            return mock_server['fqdn']
        elif 'ping' in cmd:
            result = ((0 if mock_server['tests']['reverse_resolve'] else 2) +
                      (0 if mock_server['tests']['reverse_ping'] else 1))
            return result
        elif 'python-fedora-django' in cmd:
            return 0 if mock_server['tests']['yum_valid_repos'] else 1
        elif 'ElectricFence' in cmd:
            return 0 if mock_server['tests']['yum_can_update'] else 1
        elif 'curl -k https' in cmd:
            return json.dumps({'host_id': host.id, 'command_id': 0})
        elif cmd in [
                'configure_pacemaker', 'unconfigure_pacemaker',
                'configure_target_store', 'unconfigure_target_store',
                'deregister_server', 'restart_agent', 'shutdown_server',
                'host_corosync_config', 'check_block_device', 'set_conf_param',
                'purge_configuration'
        ]:
            return None
        elif cmd in [
                'configure_target_ha', 'unconfigure_target_ha', 'start_lnet',
                'stop_lnet', 'unload_lnet', 'unconfigure_lnet',
                'configure_corosync', 'unconfigure_corosync', 'start_corosync',
                'stop_corosync', 'start_pacemaker', 'stop_pacemaker',
                'configure_ntp', 'unconfigure_ntp', 'import_target',
                'export_target', 'import_target', 'export_target'
                'set_profile', 'update_profile', 'failover_target',
                'failback_target', 'configure_network', 'open_firewall',
                'close_firewall'
        ]:
            return agent_result_ok
        elif cmd == 'get_corosync_autoconfig':
            return agent_result({
                'interfaces': {
                    'eth0': {
                        'dedicated': False,
                        'ipaddr': '192.168.0.1',
                        'prefix': 24
                    },
                    'eth1': {
                        'dedicated': True,
                        'ipaddr': '10.10.0.01',
                        'prefix': 24
                    }
                },
                'mcast_port': '666'
            })
        else:
            assert False, "The %s command is not in the known list for MockAgentRpc. Please add it then when people modify it a simple text search will let them know to change it here as well." % cmd
Exemple #13
0
def start_target(ha_label):
    '''
    Start the high availability target

    Return: Value using simple return protocol
    '''

    if not _resource_exists(ha_label):
        return agent_error("Target {} does not exist".format(ha_label))

    # if resource already started but not on primary, move it
    location = get_resource_location(ha_label)
    primary = _find_resource_constraint(ha_label, True)
    if location:
        if location != primary:
            console_log.info(
                "Resource %s already started, moving to primary node %s",
                ha_label, primary)
            error = _move_target(ha_label, primary)
            if error:
                return agent_error(error)
            location = primary
        return agent_result(location)

    # HYD-1989: brute force, try up to 3 times to start the target
    i = 0
    while True:
        i += 1

        error = AgentShell.run_canned_error_message(
            ['pcs', 'resource', 'enable', ha_label])
        if error:
            return agent_error(error)
        if _resource_exists(_zfs_name(ha_label)):
            error = AgentShell.run_canned_error_message(
                ['pcs', 'resource', 'enable',
                 _zfs_name(ha_label)])
            if error:
                return agent_error(error)
        if _resource_exists(_group_name(ha_label)):
            # enable group also, in case group was disabled
            error = AgentShell.run_canned_error_message(
                ['pcs', 'resource', 'enable',
                 _group_name(ha_label)])
            if error:
                return agent_error(error)

        # now wait for it to start
        if _wait_target(ha_label, True):
            location = get_resource_location(ha_label)
            if not location:
                return agent_error(
                    "Started {} but now can't locate it!".format(ha_label))
            return agent_result(location)

        else:
            # try to leave things in a sane state for a failed mount
            error = AgentShell.run_canned_error_message(
                ['pcs', 'resource', 'disable', ha_label])

            if error:
                return agent_error(error)

            if i < 4:
                console_log.info("failed to start target %s", ha_label)
            else:
                return agent_error(
                    "Failed to start target {}".format(ha_label))
 def start_target(self, ha_label):
     resource = self._cluster.start(ha_label)
     return agent_result(resource['started_on'])
 def unconfigure_target_ha(self, primary, ha_label, uuid):
     return agent_result(
         self._cluster.unconfigure(self.nodename, ha_label, primary))
 def configure_target_ha(self, primary, device, ha_label, uuid,
                         mount_point):
     return agent_result(
         self._cluster.configure(self.nodename, device, ha_label, uuid,
                                 primary, mount_point))