Esempio n. 1
0
class Temperature(MonitorPlugin):
    """Capture thermal zone temperatures and trip point settings."""

    persist_name = "temperature"
    scope = "temperature"
    # Prevent the Plugin base-class from scheduling looping calls.
    run_interval = None

    def __init__(self,
                 interval=30,
                 monitor_interval=60 * 60,
                 thermal_zone_path=None,
                 create_time=time.time):
        self.thermal_zone_path = thermal_zone_path
        self._interval = interval
        self._monitor_interval = monitor_interval
        self._create_time = create_time
        self._thermal_zones = []
        self._temperatures = {}

        for thermal_zone in get_thermal_zones(self.thermal_zone_path):
            self._thermal_zones.append(thermal_zone.name)
            self._temperatures[thermal_zone.name] = []

    def register(self, registry):
        super(Temperature, self).register(registry)
        if self._thermal_zones:
            self._accumulate = Accumulator(self._persist,
                                           self.registry.step_size)

            registry.reactor.call_every(self._interval, self.run)

            self._monitor = CoverageMonitor(self._interval,
                                            0.8,
                                            "temperature snapshot",
                                            create_time=self._create_time)
            registry.reactor.call_every(self._monitor_interval,
                                        self._monitor.log)
            registry.reactor.call_on("stop", self._monitor.log, priority=2000)
            self.call_on_accepted("temperature", self.exchange, True)

    def create_messages(self):
        messages = []
        for zone in self._thermal_zones:
            temperatures = self._temperatures[zone]
            self._temperatures[zone] = []
            if not temperatures:
                continue
            messages.append({
                "type": "temperature",
                "thermal-zone": zone,
                "temperatures": temperatures
            })
        return messages

    def send_messages(self, urgent):
        for message in self.create_messages():
            self.registry.broker.send_message(message,
                                              self._session_id,
                                              urgent=urgent)

    def exchange(self, urgent=False):
        self.registry.broker.call_if_accepted("temperature",
                                              self.send_messages, urgent)

    def run(self):
        self._monitor.ping()
        now = int(self._create_time())
        for zone in get_thermal_zones(self.thermal_zone_path):
            if zone.temperature_value is not None:
                key = ("accumulate", zone.name)
                step_data = self._accumulate(now, zone.temperature_value, key)
                if step_data:
                    self._temperatures[zone.name].append(step_data)
Esempio n. 2
0
class MountInfo(MonitorPlugin):

    persist_name = "mount-info"
    scope = "disk"

    max_free_space_items_to_exchange = 200

    def __init__(self,
                 interval=300,
                 monitor_interval=60 * 60,
                 mounts_file="/proc/mounts",
                 create_time=time.time,
                 statvfs=None,
                 mtab_file="/etc/mtab"):
        self.run_interval = interval
        self._monitor_interval = monitor_interval
        self._create_time = create_time
        self._mounts_file = mounts_file
        self._mtab_file = mtab_file
        if statvfs is None:
            statvfs = os.statvfs
        self._statvfs = statvfs
        self._create_time = create_time
        self._free_space = []
        self._mount_info = []
        self._mount_info_to_persist = None
        self.is_device_removable = is_device_removable

    def register(self, registry):
        super(MountInfo, self).register(registry)
        self._accumulate = Accumulator(self._persist, self.registry.step_size)
        self._monitor = CoverageMonitor(self.run_interval,
                                        0.8,
                                        "mount info snapshot",
                                        create_time=self._create_time)
        self.registry.reactor.call_every(self._monitor_interval,
                                         self._monitor.log)
        self.registry.reactor.call_on("stop", self._monitor.log, priority=2000)
        self.call_on_accepted("mount-info", self.send_messages, True)

    def create_messages(self):
        return [
            message for message in [
                self.create_mount_info_message(),
                self.create_free_space_message()
            ] if message is not None
        ]

    def create_mount_info_message(self):
        if self._mount_info:
            message = {"type": "mount-info", "mount-info": self._mount_info}
            self._mount_info_to_persist = self._mount_info[:]
            self._mount_info = []
            return message
        return None

    def create_free_space_message(self):
        if self._free_space:
            items_to_exchange = self._free_space[:self.
                                                 max_free_space_items_to_exchange]
            message = {"type": "free-space", "free-space": items_to_exchange}
            self._free_space = self._free_space[
                self.max_free_space_items_to_exchange:]
            return message
        return None

    def send_messages(self, urgent=False):
        for message in self.create_messages():
            d = self.registry.broker.send_message(message,
                                                  self._session_id,
                                                  urgent=urgent)
            if message["type"] == "mount-info":
                d.addCallback(lambda x: self.persist_mount_info())

    def exchange(self):
        self.registry.broker.call_if_accepted("mount-info", self.send_messages)

    def persist_mount_info(self):
        for timestamp, mount_info in self._mount_info_to_persist:
            mount_point = mount_info["mount-point"]
            self._persist.set(("mount-info", mount_point), mount_info)
        self._mount_info_to_persist = None
        # This forces the registry to write the persistent store to disk
        # This means that the persistent data reflects the state of the
        # messages sent.
        self.registry.flush()

    def run(self):
        self._monitor.ping()
        now = int(self._create_time())
        current_mount_points = set()
        for mount_info in self._get_mount_info():
            mount_point = mount_info["mount-point"]
            free_space = mount_info.pop("free-space")

            key = ("accumulate-free-space", mount_point)
            step_data = self._accumulate(now, free_space, key)
            if step_data:
                timestamp = step_data[0]
                free_space = int(step_data[1])
                self._free_space.append((timestamp, mount_point, free_space))

            prev_mount_info = self._persist.get(("mount-info", mount_point))
            if not prev_mount_info or prev_mount_info != mount_info:
                if mount_info not in [m for t, m in self._mount_info]:
                    self._mount_info.append((now, mount_info))

            current_mount_points.add(mount_point)

    def _get_mount_info(self):
        """Generator yields local mount points worth recording data for."""
        bound_mount_points = self._get_bound_mount_points()

        for info in get_mount_info(self._mounts_file, self._statvfs):
            device = info["device"]
            mount_point = info["mount-point"]
            if (device.startswith("/dev/")
                    and not mount_point.startswith("/dev/")
                    and not self.is_device_removable(device)
                    and mount_point not in bound_mount_points):

                yield info

    def _get_bound_mount_points(self):
        """
        Returns a set of mount points that have the "bind" option
        by parsing /etc/mtab.
        """
        bound_points = set()
        if not self._mtab_file or not os.path.isfile(self._mtab_file):
            return bound_points

        file = open(self._mtab_file, "r")
        for line in file:
            try:
                device, mount_point, filesystem, options = line.split()[:4]
                mount_point = codecs.decode(mount_point, "unicode_escape")
            except ValueError:
                continue
            if "bind" in options.split(","):
                bound_points.add(mount_point)
        return bound_points
 def setUp(self):
     super(CoverageMonitorTest, self).setUp()
     self.monitor = CoverageMonitor(1,
                                    1.0,
                                    "test",
                                    create_time=self.reactor.time)
class CoverageMonitorTest(ReactorHavingTest):
    def setUp(self):
        super(CoverageMonitorTest, self).setUp()
        self.monitor = CoverageMonitor(1,
                                       1.0,
                                       "test",
                                       create_time=self.reactor.time)

    def test_warn(self):
        self.monitor.ping()
        self.reactor.advance(1)
        self.assertFalse(self.monitor.warn())

        self.reactor.advance(1)
        self.assertTrue(self.monitor.warn())

        self.monitor.reset()
        self.assertFalse(self.monitor.warn())

    def test_percent_no_data(self):
        """
        If no time has passed and the monitor hasn't received any
        pings it should return 100%.
        """
        self.assertEqual(self.monitor.percent, 1.0)

    def test_percent_no_expected_data(self):
        """
        If time < interval has passed and the monitor has received some pings,
        it should still return 100%.
        """
        monitor = CoverageMonitor(10,
                                  1.0,
                                  "test",
                                  create_time=self.reactor.time)
        monitor.reset()
        self.reactor.advance(1)
        monitor.ping()
        self.assertEqual(monitor.percent, 1.0)

    def test_percent(self):
        self.reactor.advance(1)
        self.assertEqual(self.monitor.percent, 0.0)

        self.monitor.ping()
        self.reactor.advance(1)
        self.assertEqual(self.monitor.percent, 0.5)

    def test_percent_reset(self):
        self.reactor.advance(1)
        self.assertEqual(self.monitor.percent, 0.0)

        self.monitor.reset()
        self.monitor.ping()
        self.reactor.advance(1)
        self.assertEqual(self.monitor.percent, 1.0)

    def test_expected_count(self):
        self.reactor.advance(1)
        self.assertEqual(self.monitor.expected_count, 1.0)

        self.reactor.advance(1)
        self.assertEqual(self.monitor.expected_count, 2.0)

    def test_expected_count_reset(self):
        self.reactor.advance(1)
        self.assertEqual(self.monitor.expected_count, 1.0)

        self.monitor.reset()
        self.reactor.advance(1)
        self.assertEqual(self.monitor.expected_count, 1.0)

    def test_log(self):
        for i in range(100):
            self.monitor.ping()
            self.reactor.advance(1)
        self.monitor.log()
        self.assertTrue(
            "INFO: 100 of 100 expected test events (100.00%) "
            "occurred in the last 100.00s." in self.logfile.getvalue())

    def test_log_warning(self):
        for i in range(100):
            self.reactor.advance(1)
        self.monitor.log()
        self.assertTrue(
            "WARNING: 0 of 100 expected test events (0.00%) "
            "occurred in the last 100.00s." in self.logfile.getvalue())
Esempio n. 5
0
class CephUsage(MonitorPlugin):
    """
    Plugin that captures Ceph usage information. This only works if the client
    runs on one of the Ceph monitor nodes, and noops otherwise.

    The plugin requires the 'python-ceph' package to be installed, which is the
    case on a standard "ceph" charm deployment.
    The landscape-client charm should join a ceph-client relation with the ceph
    charm, which will crete a keyring and config file for the landscape-client
    to consume in <data_path>/ceph-client/ceph.landscape-client.conf. It
    contains the following:

    [global]
    auth supported = cephx
    keyring = <keyring-file>
    mon host = <ip>:6789

    The configured keyring can be generated with:

    ceph-authtool <keyring-file> --create-keyring
        --name=client.landscape-client --add-key=<key>
    """

    persist_name = "ceph-usage"
    scope = "storage"
    # Prevent the Plugin base-class from scheduling looping calls.
    run_interval = None

    def __init__(self,
                 interval=30,
                 monitor_interval=60 * 60,
                 create_time=time.time):
        self.active = True
        self._has_rados = has_rados
        self._interval = interval
        self._monitor_interval = monitor_interval
        self._ceph_usage_points = []
        self._ceph_ring_id = None
        self._create_time = create_time
        self._ceph_config = None

    def register(self, registry):
        super(CephUsage, self).register(registry)
        self._ceph_config = os.path.join(self.registry.config.data_path,
                                         "ceph-client",
                                         "ceph.landscape-client.conf")

        self._accumulate = Accumulator(self._persist, self._interval)
        self._monitor = CoverageMonitor(self._interval,
                                        0.8,
                                        "Ceph usage snapshot",
                                        create_time=self._create_time)

        self.registry.reactor.call_every(self._interval, self.run)
        self.registry.reactor.call_every(self._monitor_interval,
                                         self._monitor.log)
        self.registry.reactor.call_on("stop", self._monitor.log, priority=2000)
        self.call_on_accepted("ceph-usage", self.send_message, True)

    def create_message(self):
        ceph_points = self._ceph_usage_points
        ring_id = self._ceph_ring_id
        self._ceph_usage_points = []
        return {
            "type": "ceph-usage",
            "ring-id": ring_id,
            "ceph-usages": [],  # For backwards-compatibility
            "data-points": ceph_points
        }

    def send_message(self, urgent=False):
        message = self.create_message()
        if message["ring-id"] and message["data-points"]:
            self.registry.broker.send_message(message,
                                              self._session_id,
                                              urgent=urgent)

    def exchange(self, urgent=False):
        self.registry.broker.call_if_accepted("ceph-usage", self.send_message,
                                              urgent)

    def run(self):
        if not self._should_run():
            return

        self._monitor.ping()
        deferred = threads.deferToThread(self._perform_rados_call)
        deferred.addCallback(self._handle_usage)
        return deferred

    def _should_run(self):
        """Returns whether or not this plugin should run."""
        if not self.active:
            return False

        if not self._has_rados:
            logging.info("This machine does not appear to be a Ceph machine. "
                         "Deactivating plugin.")
            self.active = False
            return False

        # Check if a ceph config file is available.
        # If it is not, it's not a ceph machine or ceph is not set up yet.
        if self._ceph_config is None or not os.path.exists(self._ceph_config):
            return False

        return True

    def _perform_rados_call(self):
        """The actual Rados interaction."""
        with Rados(conffile=self._ceph_config,
                   rados_id="landscape-client") as cluster:

            cluster_stats = cluster.get_cluster_stats()
            if self._ceph_ring_id is None:
                fsid = unicode(cluster.get_fsid(), "utf-8")
                self._ceph_ring_id = fsid

        return cluster_stats

    def _handle_usage(self, cluster_stats):
        """A method to use as callback to the rados interaction.

        Parses the output and stores the usage data in an accumulator.
        """
        names_map = [("total", "kb"), ("avail", "kb_avail"),
                     ("used", "kb_used")]
        timestamp = int(self._create_time())

        step_values = []
        for name, key in names_map:
            value = cluster_stats[key] * 1024  # Report usage in bytes
            step_value = self._accumulate(timestamp, value, "usage.%s" % name)
            step_values.append(step_value)

        if not all(step_values):
            return

        point = [step_value[0]]  # accumulated timestamp
        point.extend(int(step_value[1]) for step_value in step_values)
        self._ceph_usage_points.append(tuple(point))
Esempio n. 6
0
class CPUUsage(MonitorPlugin):
    """
    Plugin that captures CPU usage information.
    """
    persist_name = "cpu-usage"
    scope = "cpu"
    # Prevent the Plugin base-class from scheduling looping calls.
    run_interval = None

    def __init__(self,
                 interval=30,
                 monitor_interval=60 * 60,
                 create_time=time.time):
        self._interval = interval
        self._monitor_interval = monitor_interval
        self._cpu_usage_points = []
        self._create_time = create_time
        self._stat_file = "/proc/stat"

    def register(self, registry):
        super(CPUUsage, self).register(registry)
        self._accumulate = Accumulator(self._persist, registry.step_size)

        self.registry.reactor.call_every(self._interval, self.run)

        self._monitor = CoverageMonitor(self._interval,
                                        0.8,
                                        "CPU usage snapshot",
                                        create_time=self._create_time)
        self.registry.reactor.call_every(self._monitor_interval,
                                         self._monitor.log)
        self.registry.reactor.call_on("stop", self._monitor.log, priority=2000)
        self.call_on_accepted("cpu-usage", self.send_message, True)

    def create_message(self):
        cpu_points = self._cpu_usage_points
        self._cpu_usage_points = []
        return {"type": "cpu-usage", "cpu-usages": cpu_points}

    def send_message(self, urgent=False):
        message = self.create_message()
        if len(message["cpu-usages"]):
            self.registry.broker.send_message(message,
                                              self._session_id,
                                              urgent=urgent)

    def exchange(self, urgent=False):
        self.registry.broker.call_if_accepted("cpu-usage", self.send_message,
                                              urgent)

    def run(self):
        self._monitor.ping()
        new_timestamp = int(self._create_time())
        new_cpu_usage = self._get_cpu_usage(self._stat_file)

        step_data = None
        if new_cpu_usage is not None:
            step_data = self._accumulate(new_timestamp, new_cpu_usage,
                                         ACCUMULATOR_KEY)
        if step_data is not None:
            self._cpu_usage_points.append(step_data)

    def _get_cpu_usage(self, stat_file):
        """
        This method computes the CPU usage from C{stat_file}.
        """
        result = None
        try:
            with open(stat_file, "r") as f:
                # The first line of the file is the CPU information aggregated
                # across cores.
                stat = f.readline()
        except IOError:
            logging.error(
                "Could not open %s for reading, "
                "CPU usage cannot be computed.", stat_file)
            return None

        # The cpu line is composed of:
        # ["cpu", user, nice, system, idle, iowait, irq, softirq, steal, guest,
        # guest nice]
        # The fields are a sum of USER_HZ quantums since boot spent in each
        # "category". We need to keep track of what the previous measure was,
        # since the current CPU usage will be calculated on the delta between
        # the previous measure and the current measure.
        # Remove the trailing "\n"
        fields = stat.split()[1:]
        idle = int(fields[3])
        value = sum(int(i) for i in fields)

        previous = self._persist.get(LAST_MESURE_KEY)
        if previous is not None and value != previous[0]:
            delta = value - previous[0]
            if delta >= 0:
                result = (delta - idle + previous[1]) / float(delta)

        self._persist.set(LAST_MESURE_KEY, (value, idle))

        return result
Esempio n. 7
0
class SwiftUsage(MonitorPlugin):
    """Plugin reporting Swift cluster usage.

    This only works if the client runs on a Swift node.  It requires the
    'python-swift' package to be installed (which is installed on swift nodes).

    """

    persist_name = "swift-usage"
    scope = "storage"

    def __init__(self,
                 interval=30,
                 monitor_interval=60 * 60,
                 create_time=time.time,
                 swift_ring="/etc/swift/object.ring.gz"):
        self._interval = interval
        self._monitor_interval = monitor_interval
        self._create_time = create_time
        self._swift_ring = swift_ring  # To discover Recon host/port

        self._has_swift = has_swift
        self._swift_usage_points = []
        self.active = True

    def register(self, registry):
        super(SwiftUsage, self).register(registry)
        self._accumulate = Accumulator(self._persist, self._interval)
        self._monitor = CoverageMonitor(self.run_interval,
                                        0.8,
                                        "Swift device usage snapshot",
                                        create_time=self._create_time)
        self.registry.reactor.call_every(self._monitor_interval,
                                         self._monitor.log)

        self.registry.reactor.call_on("stop", self._monitor.log, priority=2000)
        self.call_on_accepted("swift-usage", self.send_message, True)

    def create_message(self):
        usage_points = self._swift_usage_points
        self._swift_usage_points = []
        if usage_points:
            return {"type": "swift-usage", "data-points": usage_points}

    def send_message(self, urgent=False):
        message = self.create_message()
        if message:
            self.registry.broker.send_message(message,
                                              self._session_id,
                                              urgent=urgent)

    def exchange(self, urgent=False):
        self.registry.broker.call_if_accepted("swift-usage", self.send_message,
                                              urgent)

    def run(self):
        if not self._should_run():
            return

        self._monitor.ping()

        host = self._get_recon_host()
        deferred = threads.deferToThread(self._perform_recon_call, host)
        deferred.addCallback(self._handle_usage)
        return deferred

    def _should_run(self):
        """Return whether the plugin should run."""
        if not self.active:
            return False

        if not self._has_swift:
            logging.info("This machine does not appear to be a Swift machine. "
                         "Deactivating plugin.")
            self.active = False
            return False

        # Check for object ring config file.
        # If it is not present, it's not a Swift machine or it not yet set up.
        if not os.path.exists(self._swift_ring):
            return False

        return True

    def _get_recon_host(self):
        """Return a tuple with Recon (host, port)."""
        local_ips = self._get_local_ips()
        ring = Ring(self._swift_ring)
        for dev in ring.devs:
            if dev and dev["ip"] in local_ips:
                return dev["ip"], dev["port"]

    def _get_local_ips(self):
        """Return a list of IP addresses for local devices."""
        return [device["ip_address"] for device in get_active_device_info()]

    def _perform_recon_call(self, host):
        """Get usage information from Swift Recon service."""
        if not host:
            return

        scout = Scout("diskusage")
        # Perform the actual call
        _, disk_usage, code = scout.scout(host)
        if code == 200:
            return disk_usage

    def _handle_usage(self, disk_usage):
        timestamp = int(self._create_time())

        devices = set()
        for usage in disk_usage:
            if not usage["mounted"]:
                continue

            device = usage["device"]
            devices.add(device)

            step_values = []
            for key in ("size", "avail", "used"):
                # Store values in tree so it's easy to delete all values for a
                # device
                persist_key = "usage.%s.%s" % (device, key)
                step_value = self._accumulate(timestamp, usage[key],
                                              persist_key)
                step_values.append(step_value)

            if all(step_values):
                point = [step_value[0], device]  # accumulated timestamp
                point.extend(int(step_value[1]) for step_value in step_values)
                self._swift_usage_points.append(tuple(point))

        # Update device list and remove usage for devices that no longer exist.
        current_devices = set(self._persist.get("devices", ()))
        for device in current_devices - devices:
            self._persist.remove("usage.%s" % device)
        self._persist.set("devices", list(devices))