Exemple #1
0
class TestMetric(unittest.TestCase):
    def setUp(self):
        self.dp1 = Metric("/tmp/dp", "test.cpu")
        self.dp2 = Metric("/tmp/dp", "test.memory")

    def tearDown(self):
        self.dp1.purge()
        self.dp2.purge()

    def test_validate_archives(self):
        self.assertTrue(whisper.validateArchiveList(self.dp1.archives))

    def test_update(self):
        start = int(time.time())
        for i in range(start - 1000, start, 1):
            self.dp1.update(random.randint(0, 100), i)

        data_range, values = self.dp1.fetch(start - 1000, start)
        self.assertEqual(1000, len(values))
        self.assertEqual(data_range[0], start - 999)
        self.assertEqual(data_range[1], start + 1)

    def test_fetch(self):
        points = [random.randint(0, 100) for i in range(0, 1000)]
        start = int(time.time())
        index = 0
        for i in range(start - 1000, start, 1):
            self.dp1.update(points[index], i)
            index += 1

        data_range, values = self.dp1.fetch(start - 1000, start)
        self.assertEqual(points[1:], values[0:-1])
        self.assertEqual(sum(points[1:]), sum(values[0:-1]))
class TestMetric(unittest.TestCase):
    def setUp(self):
        self.dp1 = Metric("/tmp/dp", "test.cpu")
        self.dp2 = Metric("/tmp/dp", "test.memory")

    def tearDown(self):
        self.dp1.purge()
        self.dp2.purge()

    def test_validate_archives(self):
        self.assertTrue(whisper.validateArchiveList(self.dp1.archives))

    def test_update(self):
        start = int(time.time())
        for i in range(start - 1000, start, 1):
            self.dp1.update(random.randint(0, 100), i)

        data_range, values = self.dp1.fetch(start - 1000, start)
        self.assertEqual(1000, len(values))
        self.assertEqual(data_range[0], start - 999)
        self.assertEqual(data_range[1], start + 1)

    def test_fetch(self):
        points = [random.randint(0, 100) for i in range(0, 1000)]
        start = int(time.time())
        index = 0
        for i in range(start - 1000, start, 1):
            self.dp1.update(points[index], i)
            index += 1

        data_range, values = self.dp1.fetch(start - 1000, start)
        self.assertEqual(points[1:], values[0:-1])
        self.assertEqual(sum(points[1:]), sum(values[0:-1]))
Exemple #3
0
    def __init__(self, node_id, config):
        """
        Creates a new health object for the given node. Databases for health statistics will be created in
        the 'data_dir'. The databases use the "whisper" database format from graphite, so they automatically
        handle long-term storage with decreasing resolution. Once the databases are created they never grow
        or shrink, regardless of the amount of data stored.

        :param node_id: The node to create this health object for.
        :param data_dir: The data directory to use for the health stats.
        """
        self.path = os.path.join(config.get("metrics", "data_dir"), "health", node_id[0], str(node_id[1]))
        self.node_id = node_id
        self.memory_alert = False
        self.swap_alert = False

        self.cpu_load = Metric(self.path, "cpu.load")
        self.mem = [Metric(self.path, "mem.%s" % item) for item in memory]
        self.swp = [Metric(self.path, "swp.%s" % item) for item in swap]
        self.cpu = [Metric(self.path, "cpu.%s" % item) for item in cpu]
        self.dsk_sp = {}
        self.dsk_io = {}
        self.net = {}
Exemple #4
0
class Health(object):
    def __init__(self, node_id, config):
        """
        Creates a new health object for the given node. Databases for health statistics will be created in
        the 'data_dir'. The databases use the "whisper" database format from graphite, so they automatically
        handle long-term storage with decreasing resolution. Once the databases are created they never grow
        or shrink, regardless of the amount of data stored.

        :param node_id: The node to create this health object for.
        :param data_dir: The data directory to use for the health stats.
        """
        self.path = os.path.join(config.get("metrics", "data_dir"), "health", node_id[0], str(node_id[1]))
        self.node_id = node_id
        self.memory_alert = False
        self.swap_alert = False

        self.cpu_load = Metric(self.path, "cpu.load")
        self.mem = [Metric(self.path, "mem.%s" % item) for item in memory]
        self.swp = [Metric(self.path, "swp.%s" % item) for item in swap]
        self.cpu = [Metric(self.path, "cpu.%s" % item) for item in cpu]
        self.dsk_sp = {}
        self.dsk_io = {}
        self.net = {}

    def get_metric_names(self):
        metrics = []
        for root, dirnames, filenames in os.walk(self.path):
            for filename in filenames:
                if filename.endswith(".dp"):
                    metrics.append(os.path.join(root, filename).replace(self.path, "").replace("/", ".")[1:-3])
        return sorted(metrics)

    def capture(self):
        """
        Captures stats of the local system and writes them into the series database.
        :return: None
        """
        self.cpu_load.update(psutil.cpu_percent(interval=None))
        for i, value in enumerate(psutil.cpu_times()):
            self.cpu[i].update(value)

        for i,value in enumerate(psutil.virtual_memory()):
            self.mem[i].update(value)

        for i,value in enumerate(psutil.swap_memory()):
            self.swp[i].update(value)

        net_io_data = psutil.net_io_counters(pernic=True)
        for name in net_io_data:
            if name not in self.net:
                self.net[name] = [Metric(self.path, "net.io.%s.%s" % (name,item)) for item in net_io]
            net = self.net[name]
            for i,value in enumerate(net_io_data[name]):
                net[i].update(value)

        dsk_io_data = psutil.disk_io_counters(perdisk=True)
        for name in dsk_io_data:
            if name not in self.dsk_io:
                self.dsk_io[name] = [Metric(self.path, "dsk.io.%s.%s" % (name,item)) for item in disk_io]
            dsk_io = self.dsk_io[name]
            for i,value in enumerate(dsk_io_data[name]):
                dsk_io[i].update(value)

        self.disk_partitions = psutil.disk_partitions()
        for disks in self.disk_partitions:
            device = disks[0].replace("/dev/", "")
            name = "-".join([el for el in device.split("/") if el])
            # Create an new set of data points if we find a new disk.
            if name not in self.dsk_sp:
                self.dsk_sp[name] = [Metric(self.path, "dsk.space.%s.%s" % (name,item)) for item in disk_space]
            # Find the disk we are storing data for
            dsk = self.dsk_sp[name]
            # Update the disk stats
            for i, value in enumerate(psutil.disk_usage(disks[1])):
                dsk[i].update(value)

    def lookup(self, name):
        """
        Lookup a metric name and resolve it to a metric database.
        :param name: The metric name to resolve.
        :return: A data point if it was resolvable, or None
        """
        parts = name.split(".")
        if parts[0] == "cpu":
            if parts[1] == "load":
                return self.cpu_load
            return self.cpu[cpu.index(parts[1])]
        elif parts[0] == "mem":
            return self.mem[memory.index(parts[1])]
        elif parts[0] == "dsk":
            if parts[1] == "space":
                return self.dsk_sp[parts[2]][disk_space.index(parts[3])]
            elif parts[1] == "io":
                return self.dsk_io[parts[2]][disk_io.index(parts[3])]
        elif parts[0] == "net":
            if parts[1] == "io":
                return self.net_io[parts[2]][net_io.index(parts[3])]

        return None

    def min(self, dp, from_time, until_time=None):
        """
        Request the minimum value from the given metric.
        :param dp: The metric to check for minimum value.
        :param from_time: The earliest time in the series.
        :param until_time: The latest time in the series (optional). If omitted this defaults to now.
        :return: The minimum value from the series requested.
        """
        if type(dp) == type(str()):
            dp = self.lookup(dp)

        return min([x for x in dp.fetch(from_time, until_time)[1] if x is not None])

    def max(self, dp, from_time, until_time=None):
        """
        Request the maximum value from the given metric.
        :param dp: The metric to check for maximum value.
        :param from_time: The earliest time in the series.
        :param until_time: The latest time in the series (optional). If omitted this defaults to now.
        :return: The maximum value from the series requested.
        """
        if type(dp) == type(str()):
            dp = self.lookup(dp)

        return max([x for x in dp.fetch(from_time, until_time)[1] if x is not None])

    def avg(self, dp, from_time, until_time=None):
        """
        Request the average value for the given metric.
        :param dp: The metric to use to compute the average value.
        :param from_time: The earliest time in the series.
        :param until_time: The latest time in the series (optional). If omitted this defaults to now.
        :return: The average value from the series requested.
        """
        if type(dp) == type(str()):
            dp = self.lookup(dp)

        values = [x for x in dp.fetch(from_time, until_time)[1] if x is not None]
        return sum(values) / len(values)

    def is_healthy(self, dp, seconds, has_alert, low_water, high_water):
        """
        Checks to see if the given metric has been healthy over the last 'seconds' seconds. If 'has_alert' is true then
        the metric must be lower than 'low_water', otherwise it must be lower than 'high_water'. Returns True if it's
        healthy, false if it's not.

        :param dp: The metric to check.
        :param seconds: The number of seconds of history to evaluate.
        :param has_alert: True if the metric was previously in an unhealthy state.
        :param low_water: The low water mark if has_alert is True.
        :param high_water:  The high water mark.
        :return: True if the metric is healthy, False otherwise.
        """
        percent_used = self.avg(dp, time.time() - seconds)
        if has_alert:
            return percent_used < low_water
        return percent_used < high_water

    def is_memory_healthy(self, seconds, low_water, high_water):
        """
        Checks to see if memory is in a healthy state. This is a convenience for is_healthy("mem.percent")

        :param seconds: The number of seconds of history to check for health.
        :param low_water: The low water level in memory percent used.
        :param high_water: The high water level in memory percent used.
       :return: True if memory is healthy, False otherwise.
        """
        self.memory_alert = not self.is_healthy("mem.percent", seconds, self.memory_alert, low_water, high_water)
        return not self.memory_alert

    def is_swap_healthy(self, seconds, low_water, high_water):
        """
        Checks to see if swap is in a healthy state. This is a convenience for is_healthy("swp.percent")

        :param seconds: The number of seconds of history to check for health.
        :param low_water: The low water level in swap percent used.
        :param high_water: The high water level in swap percent used.
        :return: True if swap is healthy, False otherwise.
        """
        self.swap_alert = not self.is_healthy("swp.percent", seconds, self.swap_alert, low_water, high_water)
        return not self.swap_alert
Exemple #5
0
 def setUp(self):
     self.dp1 = Metric("/tmp/dp", "test.cpu")
     self.dp2 = Metric("/tmp/dp", "test.memory")
 def setUp(self):
     self.dp1 = Metric("/tmp/dp", "test.cpu")
     self.dp2 = Metric("/tmp/dp", "test.memory")