Ejemplo n.º 1
0
def yum_check_update(repos):
    '''
    Uses yum check_update -q to return a list of packages from the repos passed in that require an update

    Will raise a CommandExecutionError if yum throws unexpected errors.

    :param repos: The repos to check for update
    :return: List of packages that require an update.
    '''
    packages = []

    yum_response = yum_util('check-update', fromrepo=repos)

    for line in filter(None, yum_response.split('\n')):
        elements = line.split()

        # Valid lines have 3 elements with the third entry being one of the repos anything else should be ignored but logged
        if len(elements) == 3 and (elements[2] in repos):
            packages.append(elements[0])
        else:
            daemon_log.warning(
                "yum check_update found unknown response of: %s\nIn: %s\nLooking at: repos %s"
                % (line, yum_response, repos))

    return packages
Ejemplo n.º 2
0
    def _add_zfs_pool(self, line, block_devices):
        name, size_str, uuid = line.split()

        size = util.human_to_bytes(size_str)

        drive_mms = block_devices.paths_to_major_minors(
            _get_all_zpool_devices(name))

        if drive_mms is None:
            daemon_log.warning("Could not find major minors for zpool '%s'" %
                               name)
            return

        datasets = _get_zpool_datasets(name, drive_mms)
        zvols = _get_zpool_zvols(name, drive_mms, block_devices)

        pool_md = {
            "name": name,
            "path": name,
            # fabricate a major:minor. Do we ever use them as numbers?
            "block_device": "zfspool:%s" % name,
            "uuid": uuid,
            "size": size,
            "drives": drive_mms
        }

        # write new data to store (_pool/datasets/Zvols)
        write_to_store(uuid, {
            'pool': pool_md,
            'datasets': datasets,
            'zvols': zvols
        })

        self._update_pool_or_datasets(block_devices, pool_md, datasets, zvols)
    def _load(cls):
        def _walk_parents(dir):
            """Walk backwards up the tree to first non-module directory."""
            components = []

            if (os.path.isfile("%s/__init__.pyc" % dir)
                    or os.path.isfile("%s/__init__.py" % dir)):
                parent, child = os.path.split(dir)
                components.append(child)
                components.extend(_walk_parents(parent))

            return components

        def _build_namespace(dir):
            """Builds a namespace by finding all parent modules."""
            return ".".join(reversed(_walk_parents(dir)))

        names = set()

        assert os.path.isdir(cls.path)
        for modfile in sorted(glob.glob("%s/*.py*" % cls.path)):
            dir, filename = os.path.split(modfile)
            module = filename.split(".py")[0]
            if not module in EXCLUDED_PLUGINS:
                namespace = _build_namespace(dir)
                name = "%s.%s" % (namespace, module)
                names.add(name)

        daemon_log.info("Found action plugin modules: %s" % names)

        cls.commands = {}
        capabilities = set()
        for name in [n for n in names if not n.split(".")[-1].startswith('_')]:
            try:
                module = __import__(name, None, None,
                                    ['ACTIONS', 'CAPABILITIES'])
                if hasattr(module, 'ACTIONS'):
                    for fn in module.ACTIONS:
                        cls.commands[fn.func_name] = fn

                    daemon_log.info(
                        "Loaded actions from %s: %s" %
                        (name, [fn.func_name for fn in module.ACTIONS]))
                else:
                    daemon_log.warning(
                        "No 'ACTIONS' defined in action module %s" % name)

                if hasattr(module, 'CAPABILITIES') and module.CAPABILITIES:
                    capabilities.add(*module.CAPABILITIES)

            except Exception:
                daemon_log.warn("** error loading plugin %s" % name)
                daemon_log.warn(traceback.format_exc())

        cls.capabilities = list(capabilities)
Ejemplo n.º 4
0
    def _search_for_inactive(self):
        """
        Return list of importable zpool names by parsing the 'zpool import' command output

        # [root@lotus-33vm17 ~]# zpool import
        #    pool: lustre
        #      id: 5856902799170956568
        #   state: ONLINE
        #  action: The pool can be imported using its name or numeric identifier.
        #  config:
        #
        # 	lustre                             ONLINE
        # 	  scsi-0QEMU_QEMU_HARDDISK_disk15  ONLINE
        # 	  scsi-0QEMU_QEMU_HARDDISK_disk14  ONLINE
        #
        #  ... (repeats for all discovered zpools)
        """
        try:
            out = AgentShell.try_run(["zpool", "import"])
        except AgentShell.CommandExecutionError as e:
            # zpool import errors with error code 1 if nothing available to import
            if e.result.rc == 1:
                out = ""
            else:
                raise e

        zpool_names = []
        zpool_name = None

        for line in filter(None, out.split("\n")):
            match = re.match("(\s*)pool: (\S*)", line)
            if match is not None:
                zpool_name = match.group(2)

            match = re.match("(\s*)state: (\S*)", line)
            if match is not None:
                if zpool_name:
                    if match.group(2) in self.acceptable_health:
                        zpool_names.append(zpool_name)
                    else:
                        daemon_log.warning(
                            "Not scanning zpool %s because it is %s." %
                            (zpool_name, match.group(2)))
                else:
                    daemon_log.warning(
                        "Found a zpool import state but had no zpool name")

                # After each 'state' line is encountered, move onto the next zpool name
                zpool_name = None

        return zpool_names
Ejemplo n.º 5
0
    def _read_crm_mon_as_xml(self):
        """Run crm_mon --one-shot --as-xml, return raw output or None

        For expected return values (0, 10), return the stdout from output.
        If the return value is unexpected, log a warning, and return None
        """

        crm_command = ['crm_mon', '--one-shot', '--as-xml']
        rc, stdout, stderr = AgentShell.run_old(crm_command)
        if rc not in [0, 10]:  # 10 Corosync is not running on this node
            daemon_log.warning("rc=%s running '%s': '%s' '%s'" %
                               (rc, crm_command, stdout, stderr))
            stdout = None

        return stdout
Ejemplo n.º 6
0
    def _parse_crm_as_xml(self, raw):
        """ Parse the crm_mon response

        returns dict of nodes status or None if corosync is down
        """

        return_dict = None

        try:
            root = xml.fromstring(raw)
        except ParseError:
            # not xml, might be a known error message
            if CorosyncPlugin.COROSYNC_CONNECTION_FAILURE not in raw:
                daemon_log.warning("Bad xml from corosync crm_mon:  %s" % raw)
        else:
            return_dict = {}

            #  Got node info, pack it up and return
            tm_str = root.find("summary/last_update").get("time")
            tm_datetime = IMLDateTime.strptime(tm_str, "%a %b %d %H:%M:%S %Y")
            return_dict.update(
                {
                    "datetime": IMLDateTime.convert_datetime_to_utc(
                        tm_datetime
                    ).strftime("%Y-%m-%dT%H:%M:%S+00:00")
                }
            )

            nodes = {}
            for node in root.findall("nodes/node"):
                host = node.get("name")
                nodes.update({host: node.attrib})

            return_dict["nodes"] = nodes

            return_dict["options"] = {"stonith_enabled": False}

            cluster_options = root.find("summary/cluster_options")

            if cluster_options is not None:
                return_dict["options"].update(
                    {
                        "stonith_enabled": cluster_options.get("stonith-enabled")
                        == "true"
                    }
                )

        return return_dict
Ejemplo n.º 7
0
 def terminate(self, plugin_name):
     try:
         session = self.get(plugin_name)
     except KeyError:
         daemon_log.warning("SessionTable.terminate not found %s" %
                            plugin_name)
         return
     else:
         daemon_log.info("SessionTable.terminate %s/%s" %
                         (plugin_name, session.id))
         session.teardown()
         try:
             del self._sessions[plugin_name]
         except KeyError:
             daemon_log.warning(
                 "SessionTable.terminate session object already gone")
Ejemplo n.º 8
0
    def _handle_messages(self, messages):
        daemon_log.info("HttpReader: got %s messages" % (len(messages)))
        for message in messages:
            m = Message()
            m.parse(message)
            daemon_log.info("HttpReader: %s(%s, %s)" %
                            (m.type, m.plugin_name, m.session_id))

            try:
                if m.type == "SESSION_CREATE_RESPONSE":
                    self._client.sessions.create(m.plugin_name, m.session_id)
                elif m.type == "SESSION_TERMINATE_ALL":
                    self._client.sessions.terminate_all()
                elif m.type == "SESSION_TERMINATE":
                    self._client.sessions.terminate(m.plugin_name)
                elif m.type == "DATA":
                    try:
                        session = self._client.sessions.get(
                            m.plugin_name, m.session_id)
                    except KeyError:
                        daemon_log.warning(
                            "Received a message for unknown session %s/%s" %
                            (m.plugin_name, m.session_id))
                    else:
                        # We have successfully routed the message to the plugin instance
                        # for this session
                        try:
                            session.receive_message(m.body)
                        except:
                            daemon_log.error("%s/%s raised an exception: %s" %
                                             (m.plugin_name, m.session_id,
                                              traceback.format_exc()))
                            self._client.sessions.terminate(m.plugin_name)
                else:
                    raise NotImplementedError(m.type)
            except Exception:
                backtrace = "\n".join(
                    traceback.format_exception(*(sys.exc_info())))
                daemon_log.error("Plugin exception handling data message: %s" %
                                 backtrace)
Ejemplo n.º 9
0
    def _run(self):
        get_args = {
            "server_boot_time": self._client.boot_time.isoformat() + "Z",
            "client_start_time": self._client.start_time.isoformat() + "Z",
        }
        while not self._stopping.is_set():
            daemon_log.info("HttpReader: get")
            try:
                body = self._client.get(params=get_args)
            except HttpError:
                daemon_log.warning("HttpReader: request failed")
                # We potentially dropped TX messages if this happened, which could include
                # session control messages, so have to completely reset.
                # NB could change this to only terminate_all if an HTTP request was started: there is
                # no need to do the teardown if we didn't even get a TCP connection to the manager.
                self._client.sessions.terminate_all()

                self._stopping.wait(timeout=self.HTTP_RETRY_PERIOD)
                continue
            else:
                self._handle_messages(body["messages"])
        daemon_log.info("HttpReader: stopping")
Ejemplo n.º 10
0
    def _lnet_devices(self, interfaces):
        """
        :param interfaces: A list of the interfaces on the current node
        :return: Returns a dict of dicts describing the nids on the current node.
        """
        try:
            lines = AgentShell.try_run(["lctl", "get_param", "-n",
                                        "nis"]).split("\n")
        except Exception as err:
            daemon_log.warning("get_nids: failed to open: {}".format(
                err.message))
            return LinuxNetworkDevicePlugin.cached_results

        # Skip header line
        lines = lines[1:]

        # Parse each NID string out into result list
        lnet_nids = []
        for line in lines:
            if not line:
                continue
            try:
                lnet_nids.append(LNetNid(line, interfaces))
            except NetworkInterfaces.InterfaceNotFound as e:
                daemon_log.warning(e)

        result = {}

        for lnet_nid in lnet_nids:
            if lnet_nid.lnd_type not in EXCLUDE_INTERFACES:
                result[lnet_nid.name] = {
                    "nid_address": lnet_nid.nid_address,
                    "lnd_type": lnet_nid.lnd_type,
                    "lnd_network": lnet_nid.lnd_network,
                }

            LinuxNetworkDevicePlugin.cache_results(raw_result=result)

        return result
Ejemplo n.º 11
0
    def _read_crm_mon_as_xml(self):
        """Run crm_mon --one-shot --as-xml, return raw output or None

        For expected return values (0, 10), return the stdout from output.
        If the return value is unexpected, log a warning, and return None
        """

        crm_command = ["crm_mon", "--one-shot", "--as-xml"]
        try:
            rc, stdout, stderr = AgentShell.run_old(crm_command)
        except OSError as e:
            # ENOENT is fine here.  Pacemaker might not be installed yet.
            if e.errno != errno.ENOENT:
                raise e
            return None

        if rc not in [0, 10]:  # 10 Corosync is not running on this node
            daemon_log.warning("rc=%s running '%s': '%s' '%s'" %
                               (rc, crm_command, stdout, stderr))
            stdout = None

        return stdout
Ejemplo n.º 12
0
    def _lnet_devices(self, interfaces):
        '''
        :param interfaces: A list of the interfaces on the current node
        :return: Returns a dict of dicts describing the nids on the current node.
        '''
        # Read active NIDs from /proc
        try:
            with open("/proc/sys/lnet/nis") as file:
                lines = file.readlines()
        except IOError:
            daemon_log.warning("get_nids: failed to open")
            return LinuxNetworkDevicePlugin.cached_results

        # Skip header line
        lines = lines[1:]

        # Parse each NID string out into result list
        lnet_nids = []
        for line in lines:
            try:
                lnet_nids.append(LNetNid(line, interfaces))
            except NetworkInterfaces.InterfaceNotFound as e:
                daemon_log.warning(e)

        result = {}

        for lnet_nid in lnet_nids:
            if lnet_nid.lnd_type not in EXCLUDE_INTERFACES:
                result[lnet_nid.name] = {
                    'nid_address': lnet_nid.nid_address,
                    'lnd_type': lnet_nid.lnd_type,
                    'lnd_network': lnet_nid.lnd_network
                }

            LinuxNetworkDevicePlugin.cache_results(raw_result=result)

        return result
Ejemplo n.º 13
0
    def send(self):
        """Return True if the POST succeeds, else False"""
        messages = []
        completion_callbacks = []

        post_envelope = {
            "messages": [],
            "server_boot_time": self._client.boot_time.isoformat() + "Z",
            "client_start_time": self._client.start_time.isoformat() + "Z",
        }

        # Any message we drop will need its session killed
        kill_sessions = set()

        messages_bytes = len(json.dumps(post_envelope))
        while True:
            try:
                message = self._retry_messages.get_nowait()
                daemon_log.debug("HttpWriter got message from retry queue")
            except Queue.Empty:
                try:
                    message = self._messages.get_nowait()
                    daemon_log.debug(
                        "HttpWriter got message from primary queue")
                except Queue.Empty:
                    break

            if message.callback:
                completion_callbacks.append(message.callback)
            message_length = len(json.dumps(message.dump(self._client._fqdn)))

            if message_length > MAX_BYTES_PER_POST:
                daemon_log.warning("Oversized message %s/%s: %s" % (
                    message_length,
                    MAX_BYTES_PER_POST,
                    message.dump(self._client._fqdn),
                ))

            if messages and message_length > MAX_BYTES_PER_POST - messages_bytes:
                # This message will not fit into this POST: pop it back into the queue
                daemon_log.info(
                    "HttpWriter message %s overflowed POST %s/%s (%d "
                    "messages), enqueuing" % (
                        message.dump(self._client._fqdn),
                        message_length,
                        MAX_BYTES_PER_POST,
                        len(messages),
                    ))
                self._retry_messages.put(message)
                break

            messages.append(message)
            messages_bytes += message_length

        daemon_log.debug("HttpWriter sending %s messages" % len(messages))
        try:
            post_envelope["messages"] = [
                m.dump(self._client._fqdn) for m in messages
            ]
            self._client.post(post_envelope)
        except HttpError:
            daemon_log.warning("HttpWriter: request failed")
            # Terminate any sessions which we've just droppped messages for
            for message in messages:
                if message.type == "DATA":
                    kill_sessions.add(message.plugin_name)
            for plugin_name in kill_sessions:
                self._client.sessions.terminate(plugin_name)

            return False
        else:
            return True
        finally:
            for callback in completion_callbacks:
                callback()
Ejemplo n.º 14
0
    def __init__(self):
        """
        :return: A dist of dicts that describe all of the network interfaces on the node with
        the exception of the the lo interface which is excluded from the list.
        """
        def interface_to_lnet_type(if_type):
            """
            To keep everything consistant we report networks types as the lnd name not the linux name we
            have to translate somewhere so do it at source, if the user ever needs to see it as Linux types
            we can translate back.
            There is a train of thought that says it if is unknown we should cause an exception which means
            the app will not work, I prefer to try an approach that says returning just the unknown might
            well work, and if not it causes an exception somewhere else.
            """
            return self.network_translation.get(if_type.lower(),
                                                if_type.lower())

        try:
            ip_out = AgentShell.try_run(["ip", "addr"])

            with open("/proc/net/dev") as file:
                dev_stats = file.readlines()
        except IOError:
            daemon_log.warning("ip: failed to run")
            return

        # Parse the ip command output and create a list of lists, where each entry is the output from one device.
        device_lines = []
        devices = [device_lines]
        for line in ip_out.split("\n"):
            if line and line[0] != " ":  # First line of a new device.
                if device_lines:
                    device_lines = []
                    devices.append(device_lines)

            device_lines.append(line)

        # Parse the /proc/net/dev output and create a dictionary of stats (just rx_byte, tx_bytes today) with an entry for each
        # network port. The input will look something like below.
        # Inter-|   Receive                                                |  Transmit
        # face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed
        #    lo: 8305402   85521    0    0    0     0          0         0  8305402   85521    0    0    0     0       0          0
        #  eth0: 318398818 2069564    0    0    0     0          0         0  6622219   50337    0    0    0     0       0          0
        #  eth1:  408736    7857    0    0    0     0          0         0  4347300   35206    0    0    0     0       0          0
        if NetworkInterfaces.proc_net_dev_keys == {}:
            keys = dev_stats[1].replace("|", " ").split()
            keys = keys[1:]

            for index in range(0, len(keys)):
                NetworkInterfaces.proc_net_dev_keys[
                    ("rx_" if index < len(keys) / 2 else "tx_") +
                    keys[index]] = (index + 1)

        proc_net_dev_values = {}

        # Data is on the 3rd line to the end, so get the values for each entry.
        for line in dev_stats[2:]:
            values = line.split()
            proc_net_dev_values[values[0][:-1]] = NetworkInterface.RXTXStats(
                values[NetworkInterfaces.proc_net_dev_keys["rx_bytes"]],
                values[NetworkInterfaces.proc_net_dev_keys["tx_bytes"]],
            )

        # Now create a network interface for each of the entries.
        for device_lines in devices:
            interface = NetworkInterface(device_lines, proc_net_dev_values)

            if (interface.interface
                    not in EXCLUDE_INTERFACES) and (interface.slave is False):
                self[interface.interface] = {
                    "mac_address": interface.mac_address,
                    "inet4_address": interface.inet4_addr,
                    "inet4_prefix": interface.inet4_prefix,
                    "inet6_address": interface.inet6_addr,
                    "type": interface_to_lnet_type(interface.type),
                    "rx_bytes": interface.rx_tx_stats.rx_bytes,
                    "tx_bytes": interface.rx_tx_stats.tx_bytes,
                    "up": interface.up,
                    "slave": interface.slave,
                }
Ejemplo n.º 15
0
class CorosyncPlugin(DevicePlugin):
    """ Agent Plugin to read corosync node health status information

        This plugin will run on all nodes and report about the health of
        all nodes in it's peer group.

        See also the chroma_core/services/corosync

        Node status is reported as a dictionary of host names containing
        all of the possible crm_mon data as attributes:
        { 'node1': {name: attr, name: attr...}
          'node2': {name: attr, name: attr...} }

        datetime is passed in localtime converted to UTC.

        Based on xml output from this version of corosync/pacemaker
        crm --version
        1.1.7-6.el6 (Build 148fccfd5985c5590cc601123c6c16e966b85d14)
    """

    # This is the message that crm_mon will report
    # when corosync is not running
    COROSYNC_CONNECTION_FAILURE = ("Connection to cluster failed: "
                                   "connection failed")

    def _parse_crm_as_xml(self, raw):
        """ Parse the crm_mon response

        returns dict of nodes status or None if corosync is down
        """

        return_dict = None

        try:
            root = xml.fromstring(raw)
        except ParseError:
            # not xml, might be a known error message
            if CorosyncPlugin.COROSYNC_CONNECTION_FAILURE not in raw:
                daemon_log.warning("Bad xml from corosync crm_mon:  %s" % raw)
        else:
            return_dict = {}

            #  Got node info, pack it up and return
            tm_str = root.find('summary/last_update').get('time')
            tm_datetime = IMLDateTime.strptime(tm_str, '%a %b %d %H:%M:%S %Y')
            return_dict.update({'datetime': IMLDateTime.convert_datetime_to_utc(tm_datetime).strftime("%Y-%m-%dT%H:%M:%S+00:00")})

            nodes = {}
            for node in root.findall("nodes/node"):
                host = node.get("name")
                nodes.update({host: node.attrib})

            return_dict['nodes'] = nodes

            return_dict['options'] = {
                'stonith_enabled': False
            }

            cluster_options = root.find('summary/cluster_options')

            if cluster_options is not None:
                return_dict['options'].update({'stonith_enabled': cluster_options.get('stonith-enabled') == 'true'})

        return return_dict

    def _read_crm_mon_as_xml(self):
        """Run crm_mon --one-shot --as-xml, return raw output or None

        For expected return values (0, 10), return the stdout from output.
        If the return value is unexpected, log a warning, and return None
        """

        crm_command = ['crm_mon', '--one-shot', '--as-xml']
        try:
            rc, stdout, stderr = AgentShell.run_old(crm_command)
        except OSError, e:
            # ENOENT is fine here.  Pacemaker might not be installed yet.
            if e.errno != errno.ENOENT:
                raise

        if rc not in [0, 10]:  # 10 Corosync is not running on this node
            daemon_log.warning("rc=%s running '%s': '%s' '%s'" %
                               (rc, crm_command, stdout, stderr))
            stdout = None

        return stdout