Exemple #1
0
    def _fetch_data(self, instance):
        if 'kong_status_url' not in instance:
            raise Exception('missing "kong_status_url" value')
        tags = instance.get('tags', [])
        url = instance.get('kong_status_url')
        ssl_validation = instance.get('ssl_validation', True)

        parsed_url = urlparse(url)
        host = parsed_url.hostname
        port = parsed_url.port or 80
        service_check_name = 'kong.can_connect'
        service_check_tags = ['kong_host:%s' % host, 'kong_port:%s' % port] + tags

        try:
            self.log.debug(u"Querying URL: {0}".format(url))
            response = requests.get(url, headers=headers(self.agentConfig), verify=ssl_validation)
            self.log.debug(u"Kong status `response`: {0}".format(response))
            response.raise_for_status()
        except Exception:
            self.service_check(service_check_name, Kong.CRITICAL, tags=service_check_tags)
            raise
        else:
            if response.status_code == 200:
                self.service_check(service_check_name, Kong.OK, tags=service_check_tags)
            else:
                self.service_check(service_check_name, Kong.CRITICAL, tags=service_check_tags)

        return self._parse_json(response.content, tags)
Exemple #2
0
    def _fetch_url_data(self, url, username, password, verify, custom_headers):
        ''' Hit a given http url and return the stats lines '''
        # Try to fetch data from the stats URL

        auth = (username, password)
        url = "%s%s" % (url, STATS_URL)
        custom_headers.update(headers(self.agentConfig))

        self.log.debug("Fetching haproxy stats from url: %s" % url)

        response = requests.get(url,
                                auth=auth,
                                headers=custom_headers,
                                verify=verify,
                                timeout=self.default_integration_http_timeout)
        response.raise_for_status()

        # it only needs additional decoding in py3, so skip it if it's py2
        if PY2:
            return response.content.splitlines()
        else:
            content = response.content

            # If the content is a string, it can't be decoded again
            # But if it's bytes, it can be decoded.
            # So, check if it has the decode method
            decode_fn = getattr(content, "decode", None)
            if callable(decode_fn):
                content = content.decode('utf-8')

            return content.splitlines()
Exemple #3
0
    def get(self, url, instance, service_check_tags, run_check=False):
        "Hit a given URL and return the parsed json"
        self.log.debug('Fetching CouchDB stats at url: %s' % url)

        auth = None
        if 'user' in instance and 'password' in instance:
            auth = (instance['user'], instance['password'])

        # Override Accept request header so that failures are not redirected to the Futon web-ui
        request_headers = headers(self.agentConfig)
        request_headers['Accept'] = 'text/json'

        try:
            r = requests.get(url, auth=auth, headers=request_headers,
                             timeout=int(instance.get('timeout', self.TIMEOUT)))
            r.raise_for_status()
            if run_check:
                self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK,
                                   tags=service_check_tags,
                                   message='Connection to %s was successful' % url)
        except requests.exceptions.Timeout as e:
            self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
                               tags=service_check_tags, message="Request timeout: {0}, {1}".format(url, e))
            raise
        except requests.exceptions.HTTPError as e:
            self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
                               tags=service_check_tags, message=str(e.message))
            raise
        except Exception as e:
            self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
                               tags=service_check_tags, message=str(e))
            raise
        return r.json()
    def _process_status(self, status_url, auth, tags, http_host, timeout,
                        disable_ssl_validation, use_fastcgi):
        data = {}
        try:
            if use_fastcgi:
                data = json.loads(
                    self.request_fastcgi(status_url, query='json'))
            else:
                # TODO: adding the 'full' parameter gets you per-process detailed
                # informations, which could be nice to parse and output as metrics
                max_attempts = 3
                for i in range(max_attempts):
                    resp = requests.get(status_url,
                                        auth=auth,
                                        timeout=timeout,
                                        headers=headers(self.agentConfig,
                                                        http_host=http_host),
                                        verify=not disable_ssl_validation,
                                        params={'json': True})

                    # Exponential backoff, wait at most (max_attempts - 1) times in case we get a 503.
                    # Delay in seconds is (2^i + random amount of seconds between 0 and 1)
                    # 503s originated here: https://github.com/php/php-src/blob/d84ef96/sapi/fpm/fpm/fpm_status.c#L96
                    if resp.status_code == 503 and i < max_attempts - 1:
                        # retry
                        time.sleep(2**i + random.random())
                        continue

                    resp.raise_for_status()
                    data = resp.json()

                    # successfully got a response, exit the backoff system
                    break
        except Exception as e:
            self.log.error("Failed to get metrics from {}: {}".format(
                status_url, e))
            raise

        pool_name = data.get('pool', 'default')
        metric_tags = tags + ["pool:{0}".format(pool_name)]
        if http_host is not None:
            metric_tags += ["http_host:{0}".format(http_host)]

        for key, mname in iteritems(self.GAUGES):
            if key not in data:
                self.log.warn(
                    "Gauge metric {0} is missing from FPM status".format(key))
                continue
            self.gauge(mname, int(data[key]), tags=metric_tags)

        for key, mname in iteritems(self.MONOTONIC_COUNTS):
            if key not in data:
                self.log.warn(
                    "Counter metric {0} is missing from FPM status".format(
                        key))
                continue
            self.monotonic_count(mname, int(data[key]), tags=metric_tags)

        # return pool, to tag the service check with it if we have one
        return pool_name
Exemple #5
0
    def _process_ping(self, ping_url, ping_reply, auth, tags, pool_name,
                      http_host, timeout, disable_ssl_validation):
        if ping_reply is None:
            ping_reply = 'pong'

        sc_tags = ["ping_url:{0}".format(ping_url)] + tags
        if http_host is not None:
            sc_tags += ["http_host:{0}".format(http_host)]

        try:
            # TODO: adding the 'full' parameter gets you per-process detailed
            # informations, which could be nice to parse and output as metrics
            resp = requests.get(ping_url,
                                auth=auth,
                                timeout=timeout,
                                headers=headers(self.agentConfig,
                                                http_host=http_host),
                                verify=not disable_ssl_validation)
            resp.raise_for_status()

            if ping_reply not in resp.text:
                raise Exception("Received unexpected reply to ping: {}".format(
                    resp.text))

        except Exception as e:
            self.log.error("Failed to ping FPM pool {} on URL {}: {}".format(
                pool_name, ping_url, e))
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=sc_tags,
                               message=str(e))
        else:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               tags=sc_tags)
Exemple #6
0
 def _perform_request(self, instance, url, ssl_validation, auth):
     r = requests.get(url,
                      auth=auth,
                      headers=headers(self.agentConfig),
                      verify=ssl_validation,
                      timeout=self.default_integration_http_timeout,
                      proxies=self.get_instance_proxy(instance, url))
     r.raise_for_status()
     return r
Exemple #7
0
    def _get_data(self, url, config, send_sc=True):
        """
        Hit a given URL and return the parsed json
        """
        # Load basic authentication configuration, if available.
        if config.username and config.password:
            auth = (config.username, config.password)
        else:
            auth = None

        # Load SSL configuration, if available.
        # ssl_verify can be a bool or a string
        # (http://docs.python-requests.org/en/latest/user/advanced/#ssl-cert-verification)
        if isinstance(config.ssl_verify, bool) or isinstance(config.ssl_verify, str):
            verify = config.ssl_verify
        else:
            verify = None
        if config.ssl_cert and config.ssl_key:
            cert = (config.ssl_cert, config.ssl_key)
        elif config.ssl_cert:
            cert = config.ssl_cert
        else:
            cert = None

        resp = None
        try:
            resp = requests.get(
                url,
                timeout=config.timeout,
                headers=headers(self.agentConfig),
                auth=auth,
                verify=verify,
                cert=cert
            )
            resp.raise_for_status()
        except Exception as e:
            # this means we've hit a particular kind of auth error that means the config is broken
            if resp and resp.status_code == 400:
                raise AuthenticationError("The ElasticSearch credentials are incorrect")

            if send_sc:
                self.service_check(
                    self.SERVICE_CHECK_CONNECT_NAME,
                    AgentCheck.CRITICAL,
                    message="Error {0} when hitting {1}".format(e, url),
                    tags=config.service_check_tags
                )
            raise

        self.log.debug("request to url {0} returned: {1}".format(url, resp))

        return resp.json()
    def _get_stats(self, url, instance):
        """ Hit a given URL and return the parsed json. """
        self.log.debug('Fetching Couchbase stats at url: %s' % url)

        timeout = float(instance.get('timeout', DEFAULT_TIMEOUT))

        auth = None
        if 'user' in instance and 'password' in instance:
            auth = (instance['user'], instance['password'])

        r = requests.get(url, auth=auth, headers=headers(self.agentConfig), timeout=timeout)
        r.raise_for_status()
        return r.json()
Exemple #9
0
    def check(self, instance):
        if 'monitor_agent_url' not in instance:
            raise Exception(
                'Fluentd instance missing "monitor_agent_url" value.')

        try:
            url = instance.get('monitor_agent_url')
            plugin_ids = instance.get('plugin_ids', [])
            custom_tags = instance.get('tags', [])

            # Fallback  with `tag_by: plugin_id`
            tag_by = instance.get('tag_by')
            tag_by = tag_by if tag_by in self._AVAILABLE_TAGS else 'plugin_id'

            parsed_url = urlparse.urlparse(url)
            monitor_agent_host = parsed_url.hostname
            monitor_agent_port = parsed_url.port or 24220
            service_check_tags = [
                'fluentd_host:%s' % monitor_agent_host,
                'fluentd_port:%s' % monitor_agent_port
            ] + custom_tags

            timeout = float(instance.get('timeout', self.default_timeout))

            r = requests.get(url,
                             headers=headers(self.agentConfig),
                             timeout=timeout)
            r.raise_for_status()
            status = r.json()

            for p in status['plugins']:
                tag = "%s:%s" % (tag_by, p.get(tag_by))
                for m in self.GAUGES:
                    if p.get(m) is None:
                        continue
                    # Filter unspecified plugins to keep backward compatibility.
                    if len(plugin_ids) == 0 or p.get(
                            'plugin_id') in plugin_ids:
                        self.gauge('fluentd.%s' % (m), p.get(m),
                                   [tag] + custom_tags)
        except Exception as e:
            msg = "No stats could be retrieved from %s : %s" % (url, str(e))
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=service_check_tags,
                               message=msg)
            raise
        else:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               tags=service_check_tags)
Exemple #10
0
    def _perform_request(self, url, path, ssl_params, timeout):
        certificate = None
        if 'ssl_certfile' in ssl_params and 'ssl_keyfile' in ssl_params:
            certificate = (ssl_params['ssl_certfile'],
                           ssl_params['ssl_keyfile'])

        verify = ssl_params.get(
            'ssl_ca_certs',
            True) if ssl_params['ssl_cert_validation'] else False

        return requests.get(url + path,
                            verify=verify,
                            cert=certificate,
                            timeout=timeout,
                            headers=headers(self.agentConfig))
Exemple #11
0
    def _fetch_url_data(self, url, username, password, verify, custom_headers):
        ''' Hit a given http url and return the stats lines '''
        # Try to fetch data from the stats URL

        auth = (username, password)
        url = "%s%s" % (url, STATS_URL)
        custom_headers.update(headers(self.agentConfig))

        self.log.debug("Fetching haproxy stats from url: %s" % url)

        response = requests.get(url,
                                auth=auth,
                                headers=custom_headers,
                                verify=verify,
                                timeout=self.default_integration_http_timeout)
        response.raise_for_status()

        return response.content.splitlines()
Exemple #12
0
    def get(self, url, service_check_tags, run_check=False):
        """Hit a given URL and return the parsed json"""
        self.log.debug('Fetching CouchDB stats at url: %s' % url)

        # Override Accept request header so that failures are not redirected to the Futon web-ui
        request_headers = headers(self.agentConfig)
        request_headers['Accept'] = 'text/json'

        try:
            r = self.http.get(url, headers=request_headers)
            r.raise_for_status()
            if run_check:
                self.service_check(
                    self.SERVICE_CHECK_NAME,
                    AgentCheck.OK,
                    tags=service_check_tags,
                    message='Connection to %s was successful' % url,
                )
        except requests.exceptions.Timeout as e:
            self.service_check(
                self.SERVICE_CHECK_NAME,
                AgentCheck.CRITICAL,
                tags=service_check_tags,
                message="Request timeout: {0}, {1}".format(url, e),
            )
            raise
        except requests.exceptions.HTTPError as e:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=service_check_tags,
                               message=str(e))
            raise
        except Exception as e:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=service_check_tags,
                               message=str(e))
            raise
        return r.json()
Exemple #13
0
    def _check_connectivity_to_master(self, instance, tags):
        url = instance.get('gitlab_url')
        if url is None:
            # Simply ignore this service check if not configured
            return

        parsed_url = urlparse(url)
        gitlab_host = parsed_url.hostname
        gitlab_port = 443 if parsed_url.scheme == 'https' else (parsed_url.port
                                                                or 80)
        service_check_tags = [
            'gitlab_host:{}'.format(gitlab_host),
            'gitlab_port:{}'.format(gitlab_port)
        ]
        service_check_tags.extend(tags)

        # Load the ssl configuration
        ssl_cert_validation = _is_affirmative(
            instance.get('ssl_cert_validation', True))
        ssl_ca_certs = instance.get('ssl_ca_certs', True)

        verify_ssl = ssl_ca_certs if ssl_cert_validation else False

        # Timeout settings
        timeouts = (
            int(
                instance.get('connect_timeout',
                             GitlabRunnerCheck.DEFAULT_CONNECT_TIMEOUT)),
            int(
                instance.get('receive_timeout',
                             GitlabRunnerCheck.DEFAULT_RECEIVE_TIMEOUT)),
        )

        # Auth settings
        auth = None
        if 'gitlab_user' in instance and 'gitlab_password' in instance:
            auth = (instance['gitlab_user'], instance['gitlab_password'])

        try:
            self.log.debug("checking connectivity against {}".format(url))
            r = requests.get(url,
                             auth=auth,
                             verify=verify_ssl,
                             timeout=timeouts,
                             headers=headers(self.agentConfig))
            if r.status_code != 200:
                self.service_check(
                    self.MASTER_SERVICE_CHECK_NAME,
                    OpenMetricsBaseCheck.CRITICAL,
                    message="Got {} when hitting {}".format(
                        r.status_code, url),
                    tags=service_check_tags,
                )
                raise Exception("Http status code {} on url {}".format(
                    r.status_code, url))
            else:
                r.raise_for_status()

        except requests.exceptions.Timeout:
            # If there's a timeout
            self.service_check(
                self.MASTER_SERVICE_CHECK_NAME,
                OpenMetricsBaseCheck.CRITICAL,
                message="Timeout when hitting {}".format(url),
                tags=service_check_tags,
            )
            raise
        except Exception as e:
            self.service_check(
                self.MASTER_SERVICE_CHECK_NAME,
                OpenMetricsBaseCheck.CRITICAL,
                message="Error hitting {}. Error: {}".format(url, e),
                tags=service_check_tags,
            )
            raise
        else:
            self.service_check(self.MASTER_SERVICE_CHECK_NAME,
                               OpenMetricsBaseCheck.OK,
                               tags=service_check_tags)
        self.log.debug("gitlab check succeeded")
Exemple #14
0
    def check(self, instance):
        if 'lighttpd_status_url' not in instance:
            raise Exception(
                "Missing 'lighttpd_status_url' variable in Lighttpd config")

        url = self.assumed_url.get(instance['lighttpd_status_url'],
                                   instance['lighttpd_status_url'])

        tags = instance.get('tags', [])

        auth = None
        auth_type = instance.get('auth_type', 'basic').lower()

        if auth_type == 'basic':
            if 'user' in instance and 'password' in instance:
                auth = (instance['user'], instance['password'])
        elif auth_type == 'digest':
            if 'user' in instance and 'password' in instance:
                auth = requests.auth.HTTPDigestAuth(instance['user'],
                                                    instance['password'])
        else:
            msg = "Unsupported value of 'auth_type' variable in Lighttpd config: {}".format(
                auth_type)
            raise Exception(msg)

        self.log.debug("Connecting to %s" % url)

        # Submit a service check for status page availability.
        parsed_url = urlparse(url)
        lighttpd_url = parsed_url.hostname
        lighttpd_port = parsed_url.port or 80
        service_check_tags = [
            'host:%s' % lighttpd_url,
            'port:%s' % lighttpd_port
        ] + tags
        try:
            r = requests.get(url, auth=auth, headers=headers(self.agentConfig))
            r.raise_for_status()
        except Exception:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=service_check_tags)
            raise
        else:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               tags=service_check_tags)

        headers_resp = r.headers
        server_version = self._get_server_version(headers_resp)
        response = r.content

        metric_count = 0
        # Loop through and extract the numerical values
        for line in response.split(b'\n'):
            values = line.split(b': ')
            if len(values) == 2:  # match
                metric, value = values
                try:
                    value = float(value)
                except ValueError:
                    continue

                # Special case: kBytes => bytes
                if metric == b'Total kBytes':
                    value = value * 1024

                # Send metric as a gauge, if applicable
                if metric in self.GAUGES:
                    metric_count += 1
                    metric_name = self.GAUGES[metric]
                    self.gauge(metric_name, value, tags=tags)

                # Send metric as a rate, if applicable
                if metric in self.RATES:
                    metric_count += 1
                    metric_name = self.RATES[metric]
                    self.rate(metric_name, value, tags=tags)

                # Send metric as a counter, if applicable
                if metric in self.COUNTERS:
                    metric_count += 1
                    metric_name = self.COUNTERS[metric]
                    self.increment(metric_name, value, tags=tags)

        if metric_count == 0:
            url_suffix = self.URL_SUFFIX_PER_VERSION[server_version]
            if self.assumed_url.get(
                    instance['lighttpd_status_url']
            ) is None and url[-len(url_suffix):] != url_suffix:
                self.assumed_url[instance['lighttpd_status_url']] = '%s%s' % (
                    url, url_suffix)
                self.warning(
                    "Assuming url was not correct. Trying to add %s suffix to the url"
                    % url_suffix)
                self.check(instance)
            else:
                raise Exception(
                    "No metrics were fetched for this instance. Make sure "
                    "that %s is the proper url." %
                    instance['lighttpd_status_url'])
Exemple #15
0
    def _check_health_endpoint(self, instance, check_type, tags):
        if check_type not in self.ALLOWED_SERVICE_CHECKS:
            raise CheckException(
                "Health endpoint {} is not a valid endpoint".format(
                    check_type))

        url = instance.get('gitlab_url')

        if url is None:
            # Simply ignore this service check if not configured
            self.log.debug(
                "gitlab_url not configured, service check {} skipped".format(
                    check_type))
            return

        service_check_tags = self._service_check_tags(url)
        service_check_tags.extend(tags)
        verify_ssl = self._verify_ssl(instance)

        # Timeout settings
        timeouts = (
            int(
                instance.get('connect_timeout',
                             GitlabCheck.DEFAULT_CONNECT_TIMEOUT)),
            int(
                instance.get('receive_timeout',
                             GitlabCheck.DEFAULT_RECEIVE_TIMEOUT)),
        )

        # Auth settings
        auth = None
        if 'gitlab_user' in instance and 'gitlab_password' in instance:
            auth = (instance['gitlab_user'], instance['gitlab_password'])

        # These define which endpoint is hit and which type of check is actually performed
        # TODO: parse errors and report for single sub-service failure?
        service_check_name = 'gitlab.{}'.format(check_type)
        check_url = '{}/-/{}'.format(url, check_type)

        try:
            self.log.debug("checking {} against {}".format(
                check_type, check_url))
            r = requests.get(check_url,
                             auth=auth,
                             verify=verify_ssl,
                             timeout=timeouts,
                             headers=headers(self.agentConfig))
            if r.status_code != 200:
                self.service_check(
                    service_check_name,
                    OpenMetricsBaseCheck.CRITICAL,
                    message="Got {} when hitting {}".format(
                        r.status_code, check_url),
                    tags=service_check_tags,
                )
                raise Exception("Http status code {} on check_url {}".format(
                    r.status_code, check_url))
            else:
                r.raise_for_status()

        except requests.exceptions.Timeout:
            # If there's a timeout
            self.service_check(
                service_check_name,
                OpenMetricsBaseCheck.CRITICAL,
                message="Timeout when hitting {}".format(check_url),
                tags=service_check_tags,
            )
            raise
        except Exception as e:
            self.service_check(
                service_check_name,
                OpenMetricsBaseCheck.CRITICAL,
                message="Error hitting {}. Error: {}".format(check_url, e),
                tags=service_check_tags,
            )
            raise
        else:
            self.service_check(service_check_name,
                               OpenMetricsBaseCheck.OK,
                               tags=service_check_tags)
        self.log.debug("gitlab check {} succeeded".format(check_type))
Exemple #16
0
    def check(self, instance):
        if 'monitor_agent_url' not in instance:
            raise Exception(
                'Fluentd instance missing "monitor_agent_url" value.')

        try:
            url = instance.get('monitor_agent_url')
            plugin_ids = instance.get('plugin_ids', [])
            custom_tags = instance.get('tags', [])

            # Fallback  with `tag_by: plugin_id`
            tag_by = instance.get('tag_by')
            tag_by = tag_by if tag_by in self._AVAILABLE_TAGS else 'plugin_id'

            parsed_url = urlparse(url)
            monitor_agent_host = parsed_url.hostname
            monitor_agent_port = parsed_url.port or 24220
            service_check_tags = [
                'fluentd_host:%s' % monitor_agent_host,
                'fluentd_port:%s' % monitor_agent_port,
            ] + custom_tags

            self.HTTP_CONFIG_REMAPPER = {
                'headers': {
                    'name': 'headers',
                    'default': headers(self.agentConfig)
                },
                'timeout': {
                    'name': 'timeout',
                    'default': self.default_timeout
                },
            }

            r = self.http.get(url)
            r.raise_for_status()
            status = r.json()

            for p in status['plugins']:
                tag = "%s:%s" % (tag_by, p.get(tag_by))
                for m in self.GAUGES:
                    metric = p.get(m)
                    if metric is None:
                        continue
                    if m == 'retry_count':
                        # Since v1, retry_count counts the total number of errors.
                        # Use retry/steps field for temporal retry count instead.
                        rs = p.get("retry")
                        if rs is not None:
                            if rs.get("steps") is not None:
                                metric = rs.get("steps")
                            else:
                                metric = 0
                    # Filter unspecified plugins to keep backward compatibility.
                    if len(plugin_ids) == 0 or p.get(
                            'plugin_id') in plugin_ids:
                        self.gauge('fluentd.%s' % (m), metric,
                                   [tag] + custom_tags)
        except Exception as e:
            msg = "No stats could be retrieved from %s : %s" % (url, str(e))
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=service_check_tags,
                               message=msg)
            raise
        else:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               tags=service_check_tags)