Python AgentShell.run Examples, chroma_agent.lib.shell.AgentShell.run Python Examples

Example #1

0

Show file

    def _lnet_state(self):
        lnet_up = False
        lnet_loaded = not bool(
            AgentShell.run(['udevadm', 'info', '--path', '/sys/module/lnet'
                            ]).rc)

        if lnet_loaded:
            lnet_up = not bool(AgentShell.run(['lnetctl', 'net', 'show']).rc)

        return {
            (False, False): "lnet_unloaded",
            (False, True): "lnet_unloaded",
            (True, False): "lnet_down",
            (True, True): "lnet_up"
        }[(lnet_loaded, lnet_up)]

Example #2

0

Show file

File: linux_network.py Project: sagarbansal88/iml-agent

    def _lnet_state(self):
        lnet_up = False
        lnet_loaded = not bool(
            AgentShell.run(["udevadm", "info", "--path", "/sys/module/lnet"
                            ]).rc)

        if lnet_loaded:
            lnet_up = not bool(AgentShell.run(["lnetctl", "net", "show"]).rc)

        return {
            (False, False): "lnet_unloaded",
            (False, True): "lnet_unloaded",
            (True, False): "lnet_down",
            (True, True): "lnet_up",
        }[(lnet_loaded, lnet_up)]

Example #3

0

Show file

def _resource_exists(ha_label):
    '''
    Check if a resource exists in current configuration.
    :return: True if exists
    '''
    result = AgentShell.run(["crm_resource", "-W", "-r", ha_label])
    return result.rc == 0

Example #4

0

Show file

File: pacemaker.py Project: whamcloud/iml-agent

def _cibadmin(command_args, timeout=120, raise_on_timeout=False):
    assert timeout > 0, "timeout must be greater than zero"

    # I think these are "errno" values, but I'm not positive
    # but going forward, any additions to this should try to be informative
    # about the type of exit code and why it's OK to retry
    RETRY_CODES = {
        10: "something unknown",
        41: "something unknown",
        62: "Timer expired",
        107: "Transport endpoint is not connected",
    }

    command_args.insert(0, "cibadmin")
    # NB: This isn't a "true" timeout, in that it won't forcibly stop the
    # subprocess after a timeout. We'd need more invasive changes to
    # shell._run() for that.
    for _ in util.wait(timeout):
        result = AgentShell.run(command_args)

        if result.rc == 0:
            return result
        elif result.rc not in RETRY_CODES:
            break

    if raise_on_timeout and result.rc in RETRY_CODES:
        raise PacemakerError(
            "%s timed out after %d seconds: rc: %s, stderr: %s" %
            (" ".join(command_args), timeout, result.rc, result.stderr))

    return result

Example #5

0

Show file

File: manage_corosync2.py Project: sagarbansal88/iml-agent

def unconfigure_corosync2(host_fqdn, mcast_port):
    """
    Unconfigure the corosync application.

    For corosync2 don't disable pcsd, just remove host node from cluster and disable corosync from
    auto starting (service should already be stopped in state transition)

    Note that pcs cluster commands handle editing and removal of the corosync.conf file

    Return: Value using simple return protocol
    """
    error = corosync_service.disable()
    if error:
        return agent_error(error)

    # Detect if we are the only node in the cluster, we want to do this before next command removes conf file
    cluster_nodes = _nodes_in_cluster()

    result = AgentShell.run(["pcs", "--force", "cluster", "node", "remove", host_fqdn])

    if result.rc != 0:
        if "No such file or directory" in result.stderr:
            # we want to return successful if the configuration file does not exist
            console_log.warning(result.stderr)
        elif "Error: Unable to update any nodes" in result.stderr:
            # this error is expected when this is the last node in the cluster
            if len(cluster_nodes) != 1:
                return agent_error(result.stderr)
        else:
            return agent_error(result.stderr)

    return agent_ok_or_error(
        firewall_control.remove_rule(PCS_TCP_PORT, "tcp", "pcs", persist=True)
        or firewall_control.remove_rule(mcast_port, "udp", "corosync", persist=True)
    )

Example #6

0

Show file

def kernel_status():
    """
    :return: {'running': {'kernel-X.Y.Z'}, 'required': <'kernel-A.B.C' or None>}
    """
    running_kernel = "kernel-%s" % AgentShell.try_run(["uname", "-r"]).strip()

    available_kernels = [
        k for k in AgentShell.try_run(["rpm", "-q", "kernel"]).split("\n") if k
    ]

    if AgentShell.run(["rpm", "-q", "--whatprovides", "kmod-lustre"]).rc == 0:
        try:
            modlist = [
                os.path.splitext(os.path.basename(k))[0]
                for k in AgentShell.try_run([
                    "rpm", "-ql", "--whatprovides", "lustre-osd", "kmod-lustre"
                ]).split("\n") if k.endswith(".ko")
            ]

            required_kernel = latest_kernel(available_kernels, modlist)

        except (AgentShell.CommandExecutionError, StopIteration):
            required_kernel = None

    elif AgentShell.run(["rpm", "-q", "kmod-lustre-client"]).rc == 0:
        # but on a worker, we can ask kmod-lustre-client what the required
        # kernel is
        try:
            modlist = [
                os.path.splitext(os.path.basename(k))[0]
                for k in AgentShell.try_run([
                    "rpm", "-ql", "--whatprovides", "kmod-lustre-client"
                ]).split("\n") if k.endswith(".ko")
            ]

            required_kernel = latest_kernel(available_kernels, modlist)

        except (AgentShell.CommandExecutionError, StopIteration):
            required_kernel = None
    else:
        required_kernel = None

    return {
        "running": running_kernel,
        "required": required_kernel,
        "available": available_kernels,
    }

Example #7

0

Show file

    def _full_scan(self):
        # If we are a worker node then return nothing because our devices are not of interest. This is a short term
        # solution for HYD-3140. This plugin should really be loaded if it is not needed but for now this sorts out
        # and issue with PluginAgentResources being in the linux plugin.
        if config.get('settings', 'profile')['worker']:
            return {}

        # Before we do anything do a partprobe, this will ensure that everything gets an up to date view of the
        # device partitions. partprobe might throw errors so ignore return value
        AgentShell.run(["partprobe"])

        # Map of block devices major:minors to /dev/ path.
        block_devices = BlockDevices()

        # Devicemapper: LVM and Multipath
        dmsetup = DmsetupTable(block_devices)

        # Software RAID
        mds = MdRaid(block_devices).all()

        # _zpools
        zfs_devices = ZfsDevices()
        zfs_devices.full_scan(block_devices)

        # EMCPower Devices
        emcpowers = EMCPower(block_devices).all()

        # Local filesystems (not lustre) in /etc/fstab or /proc/mounts
        local_fs = LocalFilesystems(block_devices).all()

        # We have scan devices, so set the devices scanned flags.
        LinuxDevicePlugin.devices_scanned = True

        return {
            "vgs": dmsetup.vgs,
            "lvs": dmsetup.lvs,
            "zfspools": zfs_devices.zpools,
            "zfsdatasets": zfs_devices.datasets,
            "zfsvols": zfs_devices.zvols,
            "mpath": dmsetup.mpaths,
            "devs": block_devices.block_device_nodes,
            "local_fs": local_fs,
            'emcpower': emcpowers,
            'mds': mds
        }

Example #8

0

Show file

def convert_targets(force=False):
    '''
    Convert existing ocf:chroma:Target to ZFS + Lustre
    '''
    try:
        result = AgentShell.run(['cibadmin', '--query'])
    except OSError, err:
        if err.errno != errno.ENOENT:
            raise

Example #9

0

Show file

File: agent_updates.py Project: brianjmurrell/iml-agent

def kernel_status():
    """
    :return: {'running': {'kernel-X.Y.Z'}, 'required': <'kernel-A.B.C' or None>}
    """
    running_kernel = "kernel-%s" % AgentShell.try_run(["uname", "-r"]).strip()

    if AgentShell.run(["rpm", "-q", "--whatprovides", "kmod-lustre"]).rc == 0:
        # on a server, a required kernel is a lustre patched kernel since we
        # are building storage servers that can support both ldiskfs and zfs
        try:
            required_kernel = \
                next(k for k in sorted(AgentShell.try_run(["rpm", "-q",
                                                           "kernel"]).split('\n'),
                                       reverse=True)
                     if "_lustre" in k)
        except (AgentShell.CommandExecutionError, StopIteration):
            required_kernel = None
    elif AgentShell.run(["rpm", "-q", "kmod-lustre-client"]).rc == 0:
        # but on a worker, we can ask kmod-lustre-client what the required
        # kernel is
        try:
            required_kernel_prefix = \
                next(k for k in AgentShell.try_run(["rpm", "-q", "--requires",
                                                    "kmod-lustre-client"]).split('\n')
                     if "kernel >=" in k).split(" >= ")[1]
            required_kernel = AgentShell.try_run(
                ["rpm", "-q",
                 "kernel-%s*" % required_kernel_prefix]).split('\n')[0]
        except (AgentShell.CommandExecutionError, StopIteration):
            required_kernel = None
    else:
        required_kernel = None

    available_kernels = []
    for installed_kernel in AgentShell.try_run(["rpm", "-q",
                                                "kernel"]).split("\n"):
        if installed_kernel:
            available_kernels.append(installed_kernel)

    return {
        'running': running_kernel,
        'required': required_kernel,
        'available': available_kernels
    }

Example #10

0

Show file

def unmount_target(uuid):
    # This is called by the Target RA from corosync

    # only unmount targets that are controlled by chroma:Target
    try:
        result = AgentShell.run(
            ['cibadmin', '--query', '--xpath', '//primitive'])
    except OSError, err:
        if err.errno != errno.ENOENT:
            raise

Example #11

0

Show file

def get_resource_locations():
    """Parse `crm_mon -1` to identify where (if anywhere) resources
    (i.e. targets) are running
    returns [ resoure_id: location|None, ... ]
    """
    try:
        result = AgentShell.run(["crm_mon", "-1", "-r", "-X"])
    except OSError, err:
        # ENOENT is fine here.  Pacemaker might not be installed yet.
        if err.errno != errno.ENOENT:
            raise

Example #12

0

Show file

def selinux_status():
    """
    Get selinux status on node
    :return: {'status': 'Disabled'}
    """
    status = "Disabled"
    rc = AgentShell.run(["getenforce"])
    if rc.rc == 0:
        status = rc.stdout.strip()

    return {"status": status}

Example #13

0

Show file

def latest_kernel(kernel_list, modlist):
    required_kernel = None
    arch = AgentShell.try_run(["uname", "-m"]).strip()

    for kernel in kernel_list:
        if not kver_gt(kernel, required_kernel, arch):
            continue
        kver = kernel.split("-", 1)[1]
        if AgentShell.run(["modinfo", "-n", "-k", kver] + modlist).rc == 0:
            required_kernel = kernel

    return required_kernel

Example #14

0

Show file

def scanner_cmd(cmd):
    # Because we are pulling from device-scanner,
    # It is very important that we wait for
    # the udev queue to settle before requesting new data
    AgentShell.run(["udevadm", "settle"])

    client = socket.socket(socket.AF_UNIX)
    client.settimeout(10)
    client.connect_ex("/var/run/device-scanner.sock")
    client.sendall(json.dumps(cmd) + "\n")

    out = ""

    while True:
        out += client.recv(1024)

        if out.endswith("\n"):

            try:
                return json.loads(out)
            except ValueError:
                pass

Example #15

0

Show file

def _configure_target_priority(primary, ha_label, node):
    if primary:
        score = "20"
    else:
        score = "10"

    name = _constraint(ha_label, primary)
    result = AgentShell.run(
        ['pcs', 'constraint', 'location', 'add', name, ha_label, node, score])

    if result.rc == 76:
        console_log.warn("A constraint with the name %s already exists", name)
        result.rc = 0

    return result

Example #16

0

Show file

def get_resource_locations():
    """Parse `crm_mon -1` to identify where (if anywhere) resources
    (i.e. targets) are running
    returns [ resoure_id: location|None, ... ]
    """
    try:
        result = AgentShell.run(["crm_mon", "-1", "-r", "-X"])
    except OSError as err:
        # ENOENT is fine here.  Pacemaker might not be installed yet.
        if err.errno != errno.ENOENT:
            raise err
        return {}

    if result.rc != 0:
        console_log.info("crm_mon failed (%d): '%s' '%s'", result.rc,
                         result.stdout, result.stderr)
        return {}

    return _get_resource_locations(result.stdout)

Example #17

0

Show file

File: manage_corosync2.py Project: sagarbansal88/iml-agent

def _nodes_in_cluster():
    """
    Returns the nodes in the corosync cluster

    example output from command 'pcs status corosync':
    > Corosync Nodes:
    >  Online:
    >  Offline: bill.bailey.com bob.marley.com

    :return: a list of all nodes in cluster
    """
    nodes = []
    result = AgentShell.run(["pcs", "status", "nodes", "corosync"])

    if result.rc != 0:
        # log all command errors but always continue to remove node from cluster
        console_log.warning(result.stderr)
    else:
        # nodes are on the right side of lines separated with ':'
        for line in result.stdout.split("\n"):
            if line.find(":") > 0:
                nodes.extend(line.split(":")[1].strip().split())

    return nodes

Example #18

0

Show file

def fetch_device_list():
    AgentShell.run(["udevadm", "settle"])
    info = scanner_cmd("info")

    return pipe(info.itervalues(), cmap(as_device), cfilter(filter_device),
                list)

Example #19

0

Show file

File: yum_utils.py Project: whamcloud/iml-agent

def yum_util(action,
             packages=[],
             fromrepo=None,
             enablerepo=None,
             narrow_updates=False):
    """
    A wrapper to perform yum actions in encapsulated way.
    :param action:  clean, install, remove, update, requires etc
    :param packages: Packages to install or remove
    :param fromrepo: The repo the action should be carried out from, others are disabled.
    :param enablerepo: The repo to enable for the action, others are not disabled or enabled
    :param narrow_updates: ?
    :return: No return but throws CommandExecutionError on error.
    """

    if fromrepo and enablerepo:
        raise ValueError(
            "Cannot provide fromrepo and enablerepo simultaneously")

    repo_arg = []
    valid_rc_values = [0]  # Some errors values other than 0 are valid.
    tries = 2
    if fromrepo:
        repo_arg = ["--disablerepo=*"
                    ] + ["--enablerepo=%s" % r for r in fromrepo]
    elif enablerepo:
        repo_arg = ["--enablerepo=%s" % r for r in enablerepo]
    if narrow_updates and action == "query":
        repo_arg.extend(["--upgrades"])

    if action == "clean":
        cmd = ["yum", "clean", "all"
               ] + (repo_arg if repo_arg else ["--enablerepo=*"])
    elif action == "install":
        cmd = (["yum", "install", "-y", "--exclude", "kernel-debug"] +
               repo_arg + list(packages))
    elif action == "remove":
        cmd = ["yum", "remove", "-y"] + repo_arg + list(packages)
    elif action == "update":
        cmd = (["yum", "update", "-y", "--exclude", "kernel-debug"] +
               repo_arg + list(packages))
    elif action == "requires":
        cmd = ["repoquery", "--requires"] + repo_arg + list(packages)
    elif action == "query":
        cmd = ["repoquery"] + repo_arg + list(packages)
    elif action == "repoquery":
        cmd = (["repoquery", "--show-duplicates"] + repo_arg + [
            "--queryformat=%{EPOCH} %{NAME} "
            "%{VERSION} %{RELEASE} %{ARCH}"
        ])
    else:
        raise RuntimeError("Unknown yum util action %s" % action)

    # This is a poor solution for HYD-3855 but not one that carries any known cost.
    # We sometimes see intermittent failures in test, and possibly out of test, that occur
    # 1 in 50 (estimate) times. yum commands are idempotent and so trying the command three
    # times has no downside and changes the estimated chance of fail to 1 in 12500.
    for hyd_3885 in range(tries, -1, -1):
        result = AgentShell.run(cmd)

        if result.rc in valid_rc_values:
            return result.stdout
        else:
            # if we were trying to install, clean the metadata before
            # trying again
            if action == "install":
                AgentShell.run(["yum", "clean", "metadata"])
            daemon_log.info("HYD-3885 Retrying yum command '%s'" %
                            " ".join(cmd))
            if hyd_3885 == 0:
                daemon_log.info("HYD-3885 Retry yum command failed '%s'" %
                                " ".join(cmd))
                raise AgentShell.CommandExecutionError(
                    result, cmd)  # Out of retries so raise for the caller..

Example #20

0

Show file

    for con in dom.getElementsByTagName('rsc_location'):
        ha_label = con.getAttribute("rsc")
        if not locations.get(ha_label):
            locations[ha_label] = {}
        if con.getAttribute("id") == _constraint(ha_label, True):
            ind = 0
        elif con.getAttribute("id") == _constraint(ha_label, False):
            ind = 1
        else:
            console_log.info("Unknown constraint: %s", con.getAttribute("id"))
            continue
        locations[ha_label][ind] = con.getAttribute("node")

    active = get_resource_locations()

    AgentShell.run(['pcs', 'property', 'set', 'maintenance-mode=true'])

    wait_list = []
    for res in dom.getElementsByTagName('primitive'):
        if not (res.getAttribute("provider") == "chroma"
                and res.getAttribute("type") == "Target"):
            continue

        ha_label = res.getAttribute("id")

        # _get_target_config() will raise KeyError if uuid doesn't exist locally
        # next() will raise StopIteration if it doesn't find attribute target
        try:
            info = next(
                _get_target_config(ops.getAttribute("value"))
                for ops in res.getElementsByTagName('nvpair')

Example #21

0

Show file

def _unconfigure_target_priority(primary, ha_label):
    return AgentShell.run([
        'pcs', 'constraint', 'location', 'remove',
        _constraint(ha_label, primary)
    ])

Example #22

0

Show file

File: yum_utils.py Project: skvenkat/intel-manager-for-lustre

def yum_util(action, packages=[], fromrepo=None, enablerepo=None, narrow_updates=False):
    '''
    A wrapper to perform yum actions in encapsulated way.
    :param action:  clean, install, remove, update, requires etc
    :param packages: Packages to install or remove
    :param fromrepo: The repo the action should be carried out from, others are disabled.
    :param enablerepo: The repo to enable for the action, others are not disabled or enabled
    :param narrow_updates: ?
    :return: No return but throws CommandExecutionError on error.
    '''

    if fromrepo and enablerepo:
        raise ValueError("Cannot provide fromrepo and enablerepo simultaneously")

    repo_arg = []
    valid_rc_values = [0]                               # Some errors values other than 0 are valid.
    tries = 2
    if fromrepo:
        repo_arg = ['--disablerepo=*'] + ['--enablerepo=%s' % r for r in fromrepo]
    elif enablerepo:
        repo_arg = ['--enablerepo=%s' % r for r in enablerepo]
    if narrow_updates and action == 'query':
        repo_arg.extend(['--upgrades'])

    if action == 'clean':
        cmd = ['dnf', 'clean', 'all'] + (repo_arg if repo_arg else ["--enablerepo=*"])
    elif action == 'install':
        cmd = ['dnf', 'install', '--allowerasing', '-y', '--exclude', 'kernel-debug'] + \
               repo_arg + list(packages)
    elif action == 'remove':
        cmd = ['dnf', 'remove', '-y'] + repo_arg + list(packages)
    elif action == 'update':
        cmd = ['dnf', 'update', '--allowerasing', '-y', '--exclude', 'kernel-debug'] + \
               repo_arg + list(packages)
    elif action == 'requires':
        cmd = ['dnf', 'repoquery', '--requires'] + repo_arg + list(packages)
    elif action == 'query':
        cmd = ['dnf', 'repoquery', '--available'] + repo_arg + list(packages)
    elif action == 'repoquery':
        cmd = ['dnf', 'repoquery', '--available'] + repo_arg + ['--queryformat=%{EPOCH} %{NAME} %{VERSION} %{RELEASE} %{ARCH}']
    elif action == 'check-update':
        cmd = ['dnf', 'repoquery', '--queryformat=%{name} %{version}-%{release}.'
               '%{arch} %{repoid}', '--upgrades'] + repo_arg + \
            list(packages)
    else:
        raise RuntimeError('Unknown yum util action %s' % action)

    # This is a poor solution for HYD-3855 but not one that carries any known cost.
    # We sometimes see intermittent failures in test, and possibly out of test, that occur
    # 1 in 50 (estimate) times. yum commands are idempotent and so trying the command three
    # times has no downside and changes the estimated chance of fail to 1 in 12500.
    for hyd_3885 in range(tries, -1, -1):
        result = AgentShell.run(cmd)

        if result.rc in valid_rc_values:
            return result.stdout
        else:
            # if we were trying to install, clean the metadata before
            # trying again
            if action == 'install':
                AgentShell.run(['dnf', 'clean', 'metadata'])
            daemon_log.info("HYD-3885 Retrying yum command '%s'" % " ".join(cmd))
            if hyd_3885 == 0:
                daemon_log.info("HYD-3885 Retry yum command failed '%s'" % " ".join(cmd))
                raise AgentShell.CommandExecutionError(result, cmd)   # Out of retries so raise for the caller..

Example #23

0

Show file

def convert_targets(force=False):
    """
    Convert existing ocf:chroma:Target to ZFS + Lustre
    """
    try:
        result = AgentShell.run(["cibadmin", "--query"])
    except OSError as err:
        if err.errno != errno.ENOENT:
            raise err
        return {
            "crm_mon_error": {
                "rc": err.errno,
                "stdout": err.message,
                "stderr": err.strerror,
            }
        }

    if result.rc != 0:
        # Pacemaker not running, or no resources configured yet
        return {
            "crm_mon_error": {
                "rc": result.rc,
                "stdout": result.stdout,
                "stderr": result.stderr,
            }
        }

    dom = ET.fromstring(result.stdout)

    this_node = _this_node()

    # node elements are numbered from 1
    # dc-uuid is the node id of the domain controller
    dcuuid = next(
        (node.get("uname") for node in dom.findall(".//node")
         if node.get("id") == dom.get("dc-uuid")),
        "",
    )

    if dcuuid != this_node and not force:
        console_log.info("This is not Pacemaker DC %s this is %s", dcuuid,
                         this_node)
        return

    # Build map of resource -> [ primary node, secondary node ]
    locations = {}
    for con in dom.findall(".//rsc_location"):
        ha_label = con.get("rsc")
        if not locations.get(ha_label):
            locations[ha_label] = {}
        if con.get("id") == _constraint(ha_label, True):
            ind = 0
        elif con.get("id") == _constraint(ha_label, False):
            ind = 1
        else:
            console_log.info("Unknown constraint: %s", con.get("id"))
            continue
        locations[ha_label][ind] = con.get("node")

    active = get_resource_locations()

    AgentShell.try_run([
        "crm_attribute",
        "--type",
        "crm_config",
        "--name",
        "maintenance-mode",
        "--update",
        "true",
    ])

    wait_list = []
    for res in dom.findall(".//primitive"):
        if not (res.get("provider") == "chroma"
                and res.get("type") == "Target"):
            continue

        ha_label = res.get("id")

        # _get_target_config() will raise KeyError if uuid doesn't exist locally
        # next() will raise StopIteration if it doesn't find attribute target
        try:
            info = next(
                _get_target_config(ops.get("value"))
                for ops in res.findall('.//nvpair[@name="target"]'))
        except Exception as err:
            console_log.error("No local info for resource: %s", ha_label)
            continue

        _unconfigure_target_priority(False, ha_label)
        _unconfigure_target_priority(True, ha_label)
        _unconfigure_target_ha(ha_label, True)
        _configure_target_ha(ha_label, info,
                             (active.get(ha_label) is not None))
        _configure_target_priority(True, ha_label, locations[ha_label][0])
        _configure_target_priority(False, ha_label, locations[ha_label][1])
        wait_list.append([ha_label, (active.get(ha_label) is not None)])

    # wait for last item
    for wait in wait_list:
        console_log.info("Waiting on %s", wait[0])
        _wait_target(*wait)
    AgentShell.try_run([
        "crm_attribute",
        "--type",
        "crm_config",
        "--name",
        "maintenance-mode",
        "--delete",
    ])

Example #24

0

Show file

def pacemaker_running():
    result = AgentShell.run(['service', 'pacemaker', 'status'])

    return result.rc == 0

Example #25

0

Show file

File: fence_agents.py Project: whamcloud/iml-agent

 def monitor(self):
     result = AgentShell.run(self.base_cmd +
                             ["-n", self.plug, "-o", "monitor"])
     return result.rc