Ejemplo n.º 1
0
class PrometheusService(TunneledPlugin):
    def __init__(self, host):
        super().__init__(host)
        self.is_k8s = helpers.is_k8s(self._host.SshDirect)
        self.DNS_NAME = host.ip if not self.is_k8s else prometheus_connection_config[
            'host']['k8s']
        self.PORT = prometheus_connection_config['port']['compose'] if not self.is_k8s \
            else prometheus_connection_config['port']['k8s']
        self.start_tunnel(self.DNS_NAME, self.PORT)
        self.url = f'{prometheus_connection_config["url"]["compose"]}:{self.local_bind_port}' if not self.is_k8s \
            else f'{prometheus_connection_config["url"]["k8s"]}'
        if self.is_k8s:
            with open('/etc/hosts', 'r+') as f:
                content = f.read()
                if f"{host.ip} {self.DNS_NAME}" not in content:
                    logging.info(
                        f"write new line in hosts file: {host.ip} {self.DNS_NAME}"
                    )
                    f.write(f'\n{host.ip} {self.DNS_NAME}\n')
        self.headers = None if not self.is_k8s \
            else {'Authorization': f'Basic {prometheus_connection_config["auth"]}'}
        self._prom = Prometheus(url=self.url, headers=self.headers)

    def query(self, query):
        return json.loads(self._prom.query(metric=query))

    def ping(self):
        self.query(query='prometheus_engine_queries')
class PrometheusBackend(MetricBackend):
    """Backend for querying metrics from Prometheus."""
    def __init__(self, **kwargs):
        self.client = kwargs.get('client')
        if not self.client:
            self.client = Prometheus(**kwargs)

    def query(self, window, filter):
        metric = filter.format(window=window + 's')  # add resolution to filter
        timeseries = self.client.query(metric=metric)
        LOGGER.debug(pprint.pformat(timeseries))
        return timeseries

    def count(self, timeseries):
        """Count events in time serie.

        Args:
            dict: Timeserie results.

        Returns:
            int: Event count.
        """
        try:
            return timeseries["data"]["result"][0].points[0].value.int64_value
        except Exception as e:
            logging.debug(e)
            return 0  # no events in timeserie
Ejemplo n.º 3
0
def get_metrics(metric):
    prometheus = Prometheus()
    query_results = prometheus.query(metric=f'{metric}')
    query_dict = json.loads(query_results)
    success_percent = query_dict['data']['result'][0]['value'][1]
    if success_percent.replace('.', '', 1).isdigit():
        return success_percent
    else:
        return False
Ejemplo n.º 4
0
class PrometheusBackend:
    """Backend for querying metrics from Prometheus."""
    def __init__(self, client=None, url=None, headers=None):
        self.client = client
        if not self.client:
            if url:
                os.environ['PROMETHEUS_URL'] = url
            if headers:
                os.environ['PROMETHEUS_HEAD'] = json.dumps(headers)
            self.client = Prometheus()

    def query_sli(self, timestamp, window, slo_config):
        """Query SLI value from a given PromQL expression.

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window (in seconds).
            slo_config (dict): SLO configuration.

        Returns:
            float: SLI value.
        """
        conf = slo_config['backend']
        measurement = conf['measurement']
        expr = measurement['expression']
        response = self.query(expr, window, timestamp, operators=[])
        sli_value = PrometheusBackend.count(response)
        LOGGER.debug(f"SLI value: {sli_value}")
        return sli_value

    def good_bad_ratio(self, timestamp, window, slo_config):
        """Compute good bad ratio from two metric filters.

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window (in seconds).
            slo_config (dict): SLO configuration.

        Note:
            At least one of `filter_bad` or `filter_valid` is required.

        Returns:
            tuple: A tuple of (good_count, bad_count).
        """
        conf = slo_config['backend']
        good = conf['measurement']['filter_good']
        bad = conf['measurement'].get('filter_bad')
        valid = conf['measurement'].get('filter_valid')
        operators = conf['measurement'].get('operators', ['increase', 'sum'])

        # Replace window by its value in the error budget policy step
        res = self.query(good, window, timestamp, operators)
        good_count = PrometheusBackend.count(res)

        if bad:
            res = self.query(bad, window, timestamp, operators)
            bad_count = PrometheusBackend.count(res)
        elif valid:
            res = self.query(valid, window, timestamp, operators)
            valid_count = PrometheusBackend.count(res)
            bad_count = valid_count - good_count
        else:
            raise Exception("`filter_bad` or `filter_valid` is required.")

        LOGGER.debug(f'Good events: {good_count} | '
                     f'Bad events: {bad_count}')

        return (good_count, bad_count)

    # pylint: disable=unused-argument
    def distribution_cut(self, timestamp, window, slo_config):
        """Query events for distributions (histograms).

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window (in seconds).
            slo_config (dict): SLO configuration.

        Returns:
            float: SLI value.
        """
        conf = slo_config['backend']
        measurement = conf['measurement']
        expr = measurement['expression']
        threshold_bucket = measurement['threshold_bucket']
        labels = {"le": threshold_bucket}
        res_good = self.query(expr,
                              window,
                              operators=['increase', 'sum'],
                              labels=labels)
        good_count = PrometheusBackend.count(res_good)

        # We use the _count metric to figure out the 'valid count'.
        # Trying to get the valid count from the _bucket metric query is hard
        # due to Prometheus 'le' syntax that doesn't have the alternative 'ge'
        # See https://github.com/prometheus/prometheus/issues/2018.
        expr_count = expr.replace('_bucket', '_count')
        res_valid = self.query(expr_count,
                               window,
                               operators=['increase', 'sum'])
        valid_count = PrometheusBackend.count(res_valid)
        bad_count = valid_count - good_count
        LOGGER.debug(f'Good events: {good_count} | '
                     f'Bad events: {bad_count}')
        return (good_count, bad_count)

    # pylint: disable=unused-argument
    def query(self, filter, window, timestamp=None, operators=[], labels={}):
        """Query Prometheus server.

        Args:
            filter (str): Query filter.
            window (int): Window (in seconds).
            timestamp (int): UNIX timestamp.
            operators (list): List of PromQL operators to apply on query.
            labels (dict): Labels dict to add to existing query.

        Returns:
            dict: Response.
        """
        filter = PrometheusBackend._fmt_query(filter, window, operators,
                                              labels)
        LOGGER.debug(f'Query: {filter}')
        response = self.client.query(metric=filter)
        response = json.loads(response)
        LOGGER.debug(pprint.pformat(response))
        return response

    @staticmethod
    def count(response):
        """Count events in Prometheus response.
        Args:
            response (dict): Prometheus query response.
        Returns:
            int: Event count.
        """
        # Note: this function could be replaced by using the `count_over_time`
        # function that Prometheus provides.
        try:
            return float(response['data']['result'][0]['value'][1])
        except (IndexError, KeyError) as exception:
            LOGGER.warning("Couldn't find any values in timeseries response.")
            LOGGER.debug(exception, exc_info=True)
            return NO_DATA  # no events in timeseries

    @staticmethod
    def _fmt_query(query, window, operators=[], labels={}):
        """Format Prometheus query:

        * If the PromQL expression has a `window` placeholder, replace it by the
        current window. Otherwise, append it to the expression.

        * If operators are defined, apply them to the metric in sequential
        order.

        * If labels are defined, append them to existing labels.

        Args:
            query (str): Original query in YAML config.
            window (int): Query window (in seconds).
            operators (list): Operators to wrap query with.
            labels (dict): Labels dict to add to existing query.

        Returns:
            str: Formatted query.
        """
        query = query.strip()
        if '[window' in query:
            query = query.replace('[window', f'[{window}s')
        else:
            query += f'[{window}s]'
        for operator in operators:
            query = f'{operator}({query})'
        for key, value in labels.items():
            query = query.replace('}', f', {key}="{value}"}}')
        return query
Ejemplo n.º 5
0
class PrometheusBackend(MetricBackend):
    """Backend for querying metrics from Prometheus."""
    def __init__(self, **kwargs):
        self.client = kwargs.pop('client')
        if not self.client:
            url = kwargs.get('url')
            headers = kwargs.get('headers')
            if url:
                os.environ['PROMETHEUS_URL'] = url
            if headers:
                os.environ['PROMETHEUS_HEAD'] = json.dumps(headers)
            LOGGER.debug(f'Prometheus URL: {url}')
            LOGGER.debug(f'Prometheus headers: {headers}')
            self.client = Prometheus()

    def query_sli(self, **kwargs):
        """Query SLI value from a given PromQL expression.

        Args:
            kwargs (dict):
                timestamp (int): Timestamp to query.
                window (int): Window to query (in seconds).
                measurement (dict):
                    expression (str): PromQL expression.

        Returns:
            float: SLI value.
        """
        window = kwargs['window']
        measurement = kwargs['measurement']
        expr = measurement['expression']
        expression = expr.replace("[window]", f"[{window}s]")
        data = self.query(expression)
        LOGGER.debug(
            f"Expression: {expression} | Result: {pprint.pformat(data)}")
        try:
            sli_value = float(data['data']['result'][0]['value'][1])
        except IndexError:
            sli_value = 0
        LOGGER.debug(f"SLI value: {sli_value}")
        return sli_value

    def good_bad_ratio(self, **kwargs):
        """Compute good bad ratio from two metric filters.

        Args:
            kwargs (dict):
                window (str): Query window.
                measurement (dict): Measurement config
                    filter_good (str): PromQL query for good events.
                    filter_bad (str, optional): PromQL query for bad events.
                    filter_valid (str, optional): PromQL query for valid events.

        Note:
            At least one of `filter_bad` or `filter_valid` is required.

        Returns:
            tuple: A tuple of (good_event_count, bad_event_count).
        """
        window = kwargs['window']
        filter_good = kwargs['measurement']['filter_good']
        filter_bad = kwargs['measurement'].get('filter_bad')
        filter_valid = kwargs['measurement'].get('filter_valid')

        # Replace window by its value in the error budget policy step
        expr_good = filter_good.replace('[window]', f'[{window}s]')
        res_good = self.query(expr_good)
        good_event_count = PrometheusBackend.count(res_good)

        if filter_bad:
            expr_bad = filter_bad.replace('[window]', f'[{window}s]')
            res_bad = self.query(expr_bad)
            bad_event_count = PrometheusBackend.count(res_bad)
        elif filter_valid:
            expr_valid = filter_valid.replace('[window]', f'[{window}s]')
            res_valid = self.query(expr_valid)
            bad_event_count = \
                PrometheusBackend.count(res_valid) - good_event_count
        else:
            raise Exception("`filter_bad` or `filter_valid` is required.")

        LOGGER.debug(f'Good events: {good_event_count} | '
                     f'Bad events: {bad_event_count}')

        return (good_event_count, bad_event_count)

    def query(self, filter):
        """Query Prometheus server.

        Args:
            filter (str): Query filter.

        Returns:
            dict: Response.
        """
        response = self.client.query(metric=filter)
        response = json.loads(response)
        LOGGER.debug(pprint.pformat(response))
        return response

    @staticmethod
    def count(response):
        """Count events in Prometheus response.

        Args:
            response (dict): Prometheus query response.

        Returns:
            int: Event count.
        """
        # Note: this function could be replaced by using the `count_over_time`
        # function that Prometheus provides.
        try:
            return len(response['data']['result'][0]['values'])
        except (IndexError, KeyError) as exception:
            LOGGER.warning("Couldn't find any values in timeseries response")
            LOGGER.debug(exception)
            return 0  # no events in timeseries
Ejemplo n.º 6
0
class PrometheusBackend:
    """Backend for querying metrics from Prometheus."""
    def __init__(self, client=None, url=None, headers=None):
        self.client = client
        if not self.client:
            if url:
                os.environ['PROMETHEUS_URL'] = url
            if headers:
                os.environ['PROMETHEUS_HEAD'] = json.dumps(headers)
            LOGGER.debug(f'Prometheus URL: {url}')
            LOGGER.debug(f'Prometheus headers: {headers}')
            self.client = Prometheus()

    def query_sli(self, timestamp, window, slo_config):
        """Query SLI value from a given PromQL expression.

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window (in seconds).
            slo_config (dict): SLO configuration.

        Returns:
            float: SLI value.
        """
        conf = slo_config['backend']
        measurement = conf['measurement']
        expr = measurement['expression']
        expression = expr.replace("[window", f"[{window}s")
        data = self.query(expression, timestamp)
        LOGGER.debug(
            f"Expression: {expression} | Result: {pprint.pformat(data)}")
        try:
            sli_value = float(data['data']['result'][0]['value'][1])
        except IndexError:
            sli_value = 0
        LOGGER.debug(f"SLI value: {sli_value}")
        return sli_value

    def good_bad_ratio(self, timestamp, window, slo_config):
        """Compute good bad ratio from two metric filters.

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window (in seconds).
            slo_config (dict): SLO configuration.

        Note:
            At least one of `filter_bad` or `filter_valid` is required.

        Returns:
            tuple: A tuple of (good_count, bad_count).
        """
        conf = slo_config['backend']
        filter_good = conf['measurement']['filter_good']
        filter_bad = conf['measurement'].get('filter_bad')
        filter_valid = conf['measurement'].get('filter_valid')

        # Replace window by its value in the error budget policy step
        expr_good = filter_good.replace('[window', f'[{window}s')
        res_good = self.query(expr_good)
        good_count = PrometheusBackend.count(res_good)

        if filter_bad:
            expr_bad = filter_bad.replace('[window', f'[{window}s')
            res_bad = self.query(expr_bad, timestamp)
            bad_count = PrometheusBackend.count(res_bad)
        elif filter_valid:
            expr_valid = filter_valid.replace('[window', f'[{window}s')
            res_valid = self.query(expr_valid, timestamp)
            bad_count = PrometheusBackend.count(res_valid) - good_count
        else:
            raise Exception("`filter_bad` or `filter_valid` is required.")

        LOGGER.debug(f'Good events: {good_count} | '
                     f'Bad events: {bad_count}')

        return (good_count, bad_count)

    def query(self, filter, timestamp=None):  # pylint: disable=unused-argument
        """Query Prometheus server.

        Args:
            filter (str): Query filter.
            timestamp (int): UNIX timestamp.

        Returns:
            dict: Response.
        """
        response = self.client.query(metric=filter)
        response = json.loads(response)
        LOGGER.debug(pprint.pformat(response))
        return response

    @staticmethod
    def count(response):
        """Count events in Prometheus response.

        Args:
            response (dict): Prometheus query response.

        Returns:
            int: Event count.
        """
        # Note: this function could be replaced by using the `count_over_time`
        # function that Prometheus provides.
        try:
            return len(response['data']['result'][0]['values'])
        except (IndexError, KeyError) as exception:
            LOGGER.warning("Couldn't find any values in timeseries response")
            LOGGER.debug(exception, exc_info=True)
            return 0  # no events in timeseries