Esempio n. 1
0
    def check_status_lines(cs):
        check_lines = ["  " + cs.name, "  " + "-" * len(cs.name)]
        if cs.init_failed_error:
            check_lines.append(
                "    - initialize check class [%s]: %s" % (style(STATUS_ERROR, "red"), repr(cs.init_failed_error))
            )
            if cs.init_failed_traceback:
                check_lines.extend("      " + line for line in cs.init_failed_traceback.split("\n"))
        else:
            for s in cs.instance_statuses:
                c = "green"
                if s.has_warnings():
                    c = "yellow"
                if s.has_error():
                    c = "red"
                line = "    - instance #%s [%s]" % (s.instance_id, style(s.status, c))
                if s.has_error():
                    line += u": %s" % s.error
                if s.metric_count is not None:
                    line += " collected %s metrics" % s.metric_count
                if s.instance_check_stats is not None:
                    line += " Last run duration: %s" % s.instance_check_stats.get("run_time")

                check_lines.append(line)

                if s.has_warnings():
                    for warning in s.warnings:
                        warn = warning.split("\n")
                        if not len(warn):
                            continue
                        check_lines.append(u"        %s: %s" % (style("Warning", "yellow"), warn[0]))
                        check_lines.extend(u"        %s" % l for l in warn[1:])
                if s.traceback is not None:
                    check_lines.extend("      " + line for line in s.traceback.split("\n"))

            check_lines += [
                "    - Collected %s metric%s, %s event%s & %s service check%s"
                % (
                    cs.metric_count,
                    plural(cs.metric_count),
                    cs.event_count,
                    plural(cs.event_count),
                    cs.service_check_count,
                    plural(cs.service_check_count),
                )
            ]

            if cs.check_stats is not None:
                check_lines += ["    - Stats: %s" % pretty_statistics(cs.check_stats)]

            if cs.library_versions is not None:
                check_lines += ["    - Dependencies:"]
                for library, version in cs.library_versions.iteritems():
                    check_lines += ["        - %s: %s" % (library, version)]

            check_lines += [""]
            return check_lines
Esempio n. 2
0
                    i,
                    check_status.STATUS_ERROR,
                    error=str(e),
                    tb=traceback.format_exc())
            finally:
                self._roll_up_instance_metadata()

            instance_statuses.append(instance_status)

        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                after = AgentCheck._collect_internal_stats()
                self._set_internal_profiling_stats(before, after)
                log.info("\n \t %s %s" %
                         (self.name,
                          pretty_statistics(self._internal_profiling_stats)))
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug(
                    "Failed to collect Agent Stats after check {0}".format(
                        self.name))

        return instance_statuses

    def check(self, instance):
        """
        Overriden by the check class. This will be called to run the check.

        :param instance: A dict with the instance information. This will vary
        depending on your config structure.
        """
        raise NotImplementedError()
Esempio n. 3
0
            except Exception, e:
                self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
                instance_status = check_status.InstanceStatus(
                    i, check_status.STATUS_ERROR,
                    error=str(e), tb=traceback.format_exc()
                )
            finally:
                self._roll_up_instance_metadata()

            instance_statuses.append(instance_status)

        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                after = AgentCheck._collect_internal_stats()
                self._set_internal_profiling_stats(before, after)
                log.info("\n \t %s %s" % (self.name, pretty_statistics(self._internal_profiling_stats)))
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug("Failed to collect Agent Stats after check {0}".format(self.name))

        return instance_statuses

    def check(self, instance):
        """
        Overriden by the check class. This will be called to run the check.

        :param instance: A dict with the instance information. This will vary
        depending on your config structure.
        """
        raise NotImplementedError()

    def stop(self):
Esempio n. 4
0
    def run(self):
        """ Run all instances. """

        # Store run statistics if needed
        before, after = None, None
        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                before = AgentCheck._collect_internal_stats()
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug(
                    "Failed to collect Agent Stats before check {0}".format(
                        self.name))

        instance_statuses = []
        for i, instance in enumerate(self.instances):
            try:
                min_collection_interval = instance.get(
                    'min_collection_interval', self.min_collection_interval)

                now = time.time()
                if now - self.last_collection_time[i] < min_collection_interval:
                    self.log.debug(
                        "Not running instance #{0} of check {1} as it ran less than {2}s ago"
                        .format(i, self.name, min_collection_interval))
                    continue

                self.last_collection_time[i] = now

                check_start_time = None
                if self.in_developer_mode:
                    check_start_time = timeit.default_timer()
                self.check(copy.deepcopy(instance))

                instance_check_stats = None
                if check_start_time is not None:
                    instance_check_stats = {
                        'run_time': timeit.default_timer() - check_start_time
                    }

                if self.has_warnings():
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_WARNING,
                        warnings=self.get_warnings(),
                        instance_check_stats=instance_check_stats)
                else:
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_OK,
                        instance_check_stats=instance_check_stats)
            except Exception as e:
                self.log.exception("Check '%s' instance #%s failed" %
                                   (self.name, i))
                instance_status = check_status.InstanceStatus(
                    i,
                    check_status.STATUS_ERROR,
                    error=str(e),
                    tb=traceback.format_exc())
            finally:
                self._roll_up_instance_metadata()

            instance_statuses.append(instance_status)

        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                after = AgentCheck._collect_internal_stats()
                self._set_internal_profiling_stats(before, after)
                log.info("\n \t %s %s" %
                         (self.name,
                          pretty_statistics(self._internal_profiling_stats)))
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug(
                    "Failed to collect Agent Stats after check {0}".format(
                        self.name))

        return instance_statuses
Esempio n. 5
0
    def run(self):
        """ Run all instances. """

        # Store run statistics if needed
        before, after = None, None
        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                before = AgentCheck._collect_internal_stats()
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug("Failed to collect Agent Stats before check {0}".format(self.name))

        instance_statuses = []
        for i, instance in enumerate(self.instances):
            try:
                min_collection_interval = instance.get(
                    "min_collection_interval",
                    self.init_config.get("min_collection_interval", self.DEFAULT_MIN_COLLECTION_INTERVAL),
                )
                now = time.time()
                if now - self.last_collection_time[i] < min_collection_interval:
                    self.log.debug(
                        "Not running instance #{0} of check {1} as it ran less than {2}s ago".format(
                            i, self.name, min_collection_interval
                        )
                    )
                    continue

                self.last_collection_time[i] = now

                check_start_time = None
                if self.in_developer_mode:
                    check_start_time = timeit.default_timer()
                self.check(copy.deepcopy(instance))

                instance_check_stats = None
                if check_start_time is not None:
                    instance_check_stats = {"run_time": timeit.default_timer() - check_start_time}

                if self.has_warnings():
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_WARNING,
                        warnings=self.get_warnings(),
                        instance_check_stats=instance_check_stats,
                    )
                else:
                    instance_status = check_status.InstanceStatus(
                        i, check_status.STATUS_OK, instance_check_stats=instance_check_stats
                    )
            except Exception as e:
                self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
                instance_status = check_status.InstanceStatus(
                    i, check_status.STATUS_ERROR, error=str(e), tb=traceback.format_exc()
                )
            finally:
                self._roll_up_instance_metadata()

            instance_statuses.append(instance_status)

        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                after = AgentCheck._collect_internal_stats()
                self._set_internal_profiling_stats(before, after)
                log.info("\n \t %s %s" % (self.name, pretty_statistics(self._internal_profiling_stats)))
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug("Failed to collect Agent Stats after check {0}".format(self.name))

        return instance_statuses
Esempio n. 6
0
    def body_lines(self):
        # Metadata whitelist
        metadata_whitelist = [
            'hostname',
            'fqdn',
            'ipv4',
            'instance-id'
        ]

        lines = [
            'Clocks',
            '======',
            ''
        ]
        try:
            ntp_offset, ntp_styles = get_ntp_info()
            lines.append('  ' + style('NTP offset', *ntp_styles) + ': ' + style('%s s' % round(ntp_offset, 4), *ntp_styles))
        except Exception as e:
            lines.append('  NTP offset: Unknown (%s)' % str(e))
        lines.append('  System UTC time: ' + datetime.datetime.utcnow().__str__())
        lines.append('')

        # Paths to checks.d/conf.d
        lines += [
            'Paths',
            '=====',
            ''
        ]

        osname = config.get_os()

        try:
            confd_path = config.get_confd_path(osname)
        except config.PathNotFound:
            confd_path = 'Not found'

        try:
            checksd_path = config.get_checksd_path(osname)
        except config.PathNotFound:
            checksd_path = 'Not found'

        lines.append('  conf.d: ' + confd_path)
        lines.append('  checks.d: ' + checksd_path)
        lines.append('')

        # Hostnames
        lines += [
            'Hostnames',
            '=========',
            ''
        ]

        if not self.host_metadata:
            lines.append("  No host information available yet.")
        else:
            for key, host in self.host_metadata.iteritems():
                for whitelist_item in metadata_whitelist:
                    if whitelist_item in key:
                        lines.append("  " + key + ": " + host)
                        break

        lines.append('')

        # Checks.d Status
        lines += [
            'Checks',
            '======',
            ''
        ]
        check_statuses = self.check_statuses + get_jmx_status()
        if not check_statuses:
            lines.append("  No checks have run yet.")
        else:
            for cs in check_statuses:
                check_lines = [
                    '  ' + cs.name + ' ({})'.format(cs.check_version),
                    '  ' + '-' * (len(cs.name) + 3 + len(cs.check_version))
                ]
                if cs.init_failed_error:
                    check_lines.append("    - initialize check class [%s]: %s" %
                                       (style(STATUS_ERROR, 'red'),
                                        repr(cs.init_failed_error)))
                    if self.verbose and cs.init_failed_traceback:
                        check_lines.extend('      ' + line for line in
                                           cs.init_failed_traceback.split('\n'))
                else:
                    for s in cs.instance_statuses:
                        c = 'green'
                        if s.has_warnings():
                            c = 'yellow'
                        if s.has_error():
                            c = 'red'
                        line = "    - instance #%s [%s]" % (
                            s.instance_id, style(s.status, c))
                        if s.has_error():
                            line += u": %s" % s.error
                        if s.metric_count is not None:
                            line += " collected %s metrics" % s.metric_count
                        if s.instance_check_stats is not None:
                            line += " Last run duration: %s" % s.instance_check_stats.get('run_time')

                        check_lines.append(line)

                        if s.has_warnings():
                            for warning in s.warnings:
                                warn = warning.split('\n')
                                if not len(warn):
                                    continue
                                check_lines.append(u"        %s: %s" %
                                                   (style("Warning", 'yellow'), warn[0]))
                                check_lines.extend(u"        %s" % l for l in
                                                   warn[1:])
                        if self.verbose and s.traceback is not None:
                            check_lines.extend('      ' + line for line in
                                               s.traceback.split('\n'))

                    check_lines += [
                        "    - Collected %s metric%s, %s event%s & %s service check%s" % (
                            cs.metric_count, plural(cs.metric_count),
                            cs.event_count, plural(cs.event_count),
                            cs.service_check_count, plural(cs.service_check_count)),
                    ]

                    if cs.check_stats is not None:
                        check_lines += [
                            "    - Stats: %s" % pretty_statistics(cs.check_stats)
                        ]

                    if cs.library_versions is not None:
                        check_lines += [
                            "    - Dependencies:"]
                        for library, version in cs.library_versions.iteritems():
                            check_lines += [
                                "        - %s: %s" % (library, version)]

                    check_lines += [""]

                lines += check_lines

        # Metadata status
        metadata_enabled = 1#_is_affirmative(get_config().get('display_service_metadata', False))

        if metadata_enabled:
            lines += [
                "",
                "Service metadata",
                "================",
                ""
            ]
            if not check_statuses:
                lines.append("  No checks have run yet.")
            else:
                meta_lines = []
                for cs in check_statuses:
                    # Check title
                    check_line = [
                        '  ' + cs.name,
                        '  ' + '-' * len(cs.name)
                    ]
                    instance_lines = []
                    for i, meta in enumerate(cs.service_metadata):
                        if not meta:
                            continue
                        instance_lines += ["    - instance #%s:" % i]
                        for k, v in meta.iteritems():
                            instance_lines += ["        - %s: %s" % (k, v)]
                    if instance_lines:
                        check_line += instance_lines
                        meta_lines += check_line
                if meta_lines:
                    lines += meta_lines
                else:
                    lines.append("  No metadata were collected.")

        # Emitter status
        lines += [
            "",
            "Emitters",
            "========",
            ""
        ]
        if not self.emitter_statuses:
            lines.append("  No emitters have run yet.")
        else:
            for es in self.emitter_statuses:
                c = 'green'
                if es.has_error():
                    c = 'red'
                line = "  - %s [%s]" % (es.name, style(es.status, c))
                if es.status != STATUS_OK:
                    line += ": %s" % es.error
                lines.append(line)

        return lines
Esempio n. 7
0
    def check_status_lines(cs):
        check_lines = [
            '  ' + cs.name + ' ({})'.format(cs.check_version),
            '  ' + '-' * (len(cs.name) + 3 + len(cs.check_version))
        ]
        if cs.init_failed_error:
            check_lines.append("    - initialize check class [%s]: %s" %
                               (style(STATUS_ERROR, 'red'),
                                repr(cs.init_failed_error)))
            if cs.init_failed_traceback:
                check_lines.extend('      ' + line for line in
                                   cs.init_failed_traceback.split('\n'))
        else:
            for s in cs.instance_statuses:
                c = 'green'
                if s.has_warnings():
                    c = 'yellow'
                if s.has_error():
                    c = 'red'
                line = "    - instance #%s [%s]" % (
                    s.instance_id, style(s.status, c))
                if s.has_error():
                    line += u": %s" % s.error
                if s.metric_count is not None:
                    line += " collected %s metrics" % s.metric_count
                if s.instance_check_stats is not None:
                    line += " Last run duration: %s" % s.instance_check_stats.get('run_time')

                check_lines.append(line)

                if s.has_warnings():
                    for warning in s.warnings:
                        warn = warning.split('\n')
                        if not len(warn):
                            continue
                        check_lines.append(u"        %s: %s" %
                                           (style("Warning", 'yellow'), warn[0]))
                        check_lines.extend(u"        %s" % l for l in
                                           warn[1:])
                if s.traceback is not None:
                    check_lines.extend('      ' + line for line in
                                       s.traceback.split('\n'))

            check_lines += [
                "    - Collected %s metric%s, %s event%s & %s service check%s" % (
                    cs.metric_count, plural(cs.metric_count),
                    cs.event_count, plural(cs.event_count),
                    cs.service_check_count, plural(cs.service_check_count)),
            ]

            if cs.check_stats is not None:
                check_lines += [
                    "    - Stats: %s" % pretty_statistics(cs.check_stats)
                ]

            if cs.library_versions is not None:
                check_lines += [
                    "    - Dependencies:"]
                for library, version in cs.library_versions.iteritems():
                    check_lines += ["        - %s: %s" % (library, version)]

            check_lines += [""]
            return check_lines
Esempio n. 8
0
    def body_lines(self):
        # Metadata whitelist
        metadata_whitelist = [
            'hostname',
            'fqdn',
            'ipv4',
            'instance-id'
        ]

        lines = [
            'Clocks',
            '======',
            ''
        ]
        try:
            ntp_offset, ntp_styles = get_ntp_info()
            lines.append('  ' + style('NTP offset', *ntp_styles) + ': ' + style('%s s' % round(ntp_offset, 4), *ntp_styles))
        except Exception as e:
            lines.append('  NTP offset: Unknown (%s)' % str(e))
        lines.append('  System UTC time: ' + datetime.datetime.utcnow().__str__())
        lines.append('')

        # Paths to checks.d/conf.d
        lines += [
            'Paths',
            '=====',
            ''
        ]

        osname = config.get_os()

        try:
            confd_path = config.get_confd_path(osname)
        except config.PathNotFound:
            confd_path = 'Not found'

        try:
            checksd_path = config.get_checksd_path(osname)
        except config.PathNotFound:
            checksd_path = 'Not found'

        lines.append('  conf.d: ' + confd_path)
        lines.append('  checks.d: ' + checksd_path)
        lines.append('')

        # Hostnames
        lines += [
            'Hostnames',
            '=========',
            ''
        ]

        if not self.host_metadata:
            lines.append("  No host information available yet.")
        else:
            for key, host in self.host_metadata.iteritems():
                for whitelist_item in metadata_whitelist:
                    if whitelist_item in key:
                        lines.append("  " + key + ": " + host)
                        break

        lines.append('')

        # Checks.d Status
        lines += [
            'Checks',
            '======',
            ''
        ]
        check_statuses = self.check_statuses + get_jmx_status()
        if not check_statuses:
            lines.append("  No checks have run yet.")
        else:
            for cs in check_statuses:
                check_lines = [
                    '  ' + cs.name + ' ({})'.format(cs.check_version),
                    '  ' + '-' * (len(cs.name) + 3 + len(cs.check_version))
                ]
                if cs.init_failed_error:
                    check_lines.append("    - initialize check class [%s]: %s" %
                                       (style(STATUS_ERROR, 'red'),
                                        repr(cs.init_failed_error)))
                    if self.verbose and cs.init_failed_traceback:
                        check_lines.extend('      ' + line for line in
                                           cs.init_failed_traceback.split('\n'))
                else:
                    for s in cs.instance_statuses:
                        c = 'green'
                        if s.has_warnings():
                            c = 'yellow'
                        if s.has_error():
                            c = 'red'
                        line = "    - instance #%s [%s]" % (
                            s.instance_id, style(s.status, c))
                        if s.has_error():
                            line += u": %s" % s.error
                        if s.metric_count is not None:
                            line += " collected %s metrics" % s.metric_count
                        if s.instance_check_stats is not None:
                            line += " Last run duration: %s" % s.instance_check_stats.get('run_time')

                        check_lines.append(line)

                        if s.has_warnings():
                            for warning in s.warnings:
                                warn = warning.split('\n')
                                if not len(warn):
                                    continue
                                check_lines.append(u"        %s: %s" %
                                                   (style("Warning", 'yellow'), warn[0]))
                                check_lines.extend(u"        %s" % l for l in
                                                   warn[1:])
                        if self.verbose and s.traceback is not None:
                            check_lines.extend('      ' + line for line in
                                               s.traceback.split('\n'))

                    check_lines += [
                        "    - Collected %s metric%s, %s event%s & %s service check%s" % (
                            cs.metric_count, plural(cs.metric_count),
                            cs.event_count, plural(cs.event_count),
                            cs.service_check_count, plural(cs.service_check_count)),
                    ]

                    if cs.check_stats is not None:
                        check_lines += [
                            "    - Stats: %s" % pretty_statistics(cs.check_stats)
                        ]

                    if cs.library_versions is not None:
                        check_lines += [
                            "    - Dependencies:"]
                        for library, version in cs.library_versions.iteritems():
                            check_lines += [
                                "        - %s: %s" % (library, version)]

                    check_lines += [""]

                lines += check_lines

        # Metadata status
        metadata_enabled = _is_affirmative(get_config().get('display_service_metadata', False))

        if metadata_enabled:
            lines += [
                "",
                "Service metadata",
                "================",
                ""
            ]
            if not check_statuses:
                lines.append("  No checks have run yet.")
            else:
                meta_lines = []
                for cs in check_statuses:
                    # Check title
                    check_line = [
                        '  ' + cs.name,
                        '  ' + '-' * len(cs.name)
                    ]
                    instance_lines = []
                    for i, meta in enumerate(cs.service_metadata):
                        if not meta:
                            continue
                        instance_lines += ["    - instance #%s:" % i]
                        for k, v in meta.iteritems():
                            instance_lines += ["        - %s: %s" % (k, v)]
                    if instance_lines:
                        check_line += instance_lines
                        meta_lines += check_line
                if meta_lines:
                    lines += meta_lines
                else:
                    lines.append("  No metadata were collected.")

        # Emitter status
        lines += [
            "",
            "Emitters",
            "========",
            ""
        ]
        if not self.emitter_statuses:
            lines.append("  No emitters have run yet.")
        else:
            for es in self.emitter_statuses:
                c = 'green'
                if es.has_error():
                    c = 'red'
                line = "  - %s [%s]" % (es.name, style(es.status, c))
                if es.status != STATUS_OK:
                    line += ": %s" % es.error
                lines.append(line)

        return lines
Esempio n. 9
0
            except Exception, e:
                self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
                instance_status = check_status.InstanceStatus(
                    i, check_status.STATUS_ERROR,
                    error=str(e), tb=traceback.format_exc()
                )
            finally:
                self._roll_up_instance_metadata()

            instance_statuses.append(instance_status)

        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                after = AgentCheck._collect_internal_stats()
                self._set_internal_profiling_stats(before, after)
                log.info("\n \t %s %s" % (self.name, pretty_statistics(self._internal_profiling_stats)))
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug("Failed to collect Agent Stats after check {0}".format(self.name))

        return instance_statuses

    def check(self, instance):
        """
        Overriden by the check class. This will be called to run the check.

        :param instance: A dict with the instance information. This will vary
        depending on your config structure.
        """
        raise NotImplementedError()

    def stop(self):
Esempio n. 10
0
                            check_lines.extend(
                                '      ' + line
                                for line in s.traceback.split('\n'))

                    check_lines += [
                        "    - Collected %s metric%s, %s event%s & %s service check%s"
                        % (cs.metric_count, plural(
                            cs.metric_count), cs.event_count,
                           plural(cs.event_count), cs.service_check_count,
                           plural(cs.service_check_count)),
                    ]

                    if cs.check_stats is not None:
                        check_lines += [
                            "    - Stats: %s" %
                            pretty_statistics(cs.check_stats)
                        ]

                    if cs.library_versions is not None:
                        check_lines += ["    - Dependencies:"]
                        for library, version in cs.library_versions.iteritems(
                        ):
                            check_lines += [
                                "        - %s: %s" % (library, version)
                            ]

                    check_lines += [""]

                lines += check_lines

        # Metadata status