def post(self, project_id): current_user_id = get_jwt_identity() current_user_roles = get_jwt_claims()['roles'] project_memory_schema = MetricsSchema() project_cpu_data = request.get_json() validated_query_data, errors = project_memory_schema.load( project_cpu_data) if errors: return dict(status='fail', message=errors), 400 project = Project.get_by_id(project_id) if not project: return dict(status='fail', message=f'project {project_id} not found'), 404 if not is_owner_or_admin(project, current_user_id, current_user_roles): return dict(status='fail', message='unauthorised'), 403 # Get current time current_time = datetime.datetime.now() yesterday = current_time + datetime.timedelta(days=-1) namespace = project.alias prometheus = Prometheus() start = validated_query_data.get('start', yesterday.timestamp()) end = validated_query_data.get('end', current_time.timestamp()) step = validated_query_data.get('step', '1h') prom_data = prometheus.query_rang( start=start, end=end, step=step, metric= 'sum(rate(container_cpu_usage_seconds_total{container!="POD", image!="",namespace="' + namespace + '"}[5m]))') # chenge array values to json"values" new_data = json.loads(prom_data) cpu_data_list = [] try: for value in new_data["data"]["result"][0]["values"]: case = {'timestamp': value[0], 'value': value[1]} cpu_data_list.append(case) except: return dict(status='fail', message='No values found'), 404 return dict(status='success', data=dict(values=cpu_data_list)), 200
def post(self, project_id, app_id): current_user_id = get_jwt_identity() current_user_roles = get_jwt_claims()['roles'] project = Project.get_by_id(project_id) if not project: return dict(status='fail', message=f'project {project_id} not found'), 404 if not is_owner_or_admin(project, current_user_id, current_user_roles): return dict(status='fail', message='unauthorised'), 403 # Check app from db app = App.get_by_id(app_id) if not app: return dict(status='fail', message=f'app {app_id} not found'), 404 namespace = project.alias app_alias = app.alias prometheus = Prometheus() try: prom_data = prometheus.query( metric= 'sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace="' + namespace + '", persistentvolumeclaim=~"' + app_alias + '.*"})') # change array values to json new_data = json.loads(prom_data) values = new_data["data"] percentage_data = prometheus.query( metric='100*(kubelet_volume_stats_used_bytes{namespace="' + namespace + '", persistentvolumeclaim=~"' + app_alias + '.*"}/kubelet_volume_stats_capacity_bytes{namespace="' + namespace + '", persistentvolumeclaim=~"' + app_alias + '.*"})') data = json.loads(percentage_data) volume_perc_value = data["data"] except: return dict(status='fail', message='No values found'), 404 return dict(status='success', data=dict(storage_capacity=values, storage_percentage_usage=volume_perc_value)), 200
def promQueries(startTime, stopTime, testDirPath): prom = Prometheus() cpu5s = json.loads(prom.query_rang(metric='sum(container_cpu_usage_seconds_total{namespace="default"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) memWriteB5s = json.loads(prom.query_rang(metric='sum(container_fs_writes_bytes_total{namespace="default"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) memReadB5s = json.loads(prom.query_rang(metric='sum(container_fs_reads_bytes_total{namespace="default"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) netReadB5s = json.loads(prom.query_rang(metric='sum(container_network_receive_bytes_total{namespace="default"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) netWriteB5s = json.loads(prom.query_rang(metric='sum(container_network_transmit_bytes_total{namespace="default"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) """ cpu5s = json.loads(prom.query_rang(metric='sum(container_cpu_usage_seconds_total{namespace="robot-shop"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) memWriteB5s = json.loads(prom.query_rang(metric='sum(container_fs_writes_bytes_total{namespace="robot-shop"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) memReadB5s = json.loads(prom.query_rang(metric='sum(container_fs_reads_bytes_total{namespace="robot-shop"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) netReadB5s = json.loads(prom.query_rang(metric='sum(container_network_receive_bytes_total{namespace="robot-shop"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) netWriteB5s = json.loads(prom.query_rang(metric='sum(container_network_transmit_bytes_total{namespace="robot-shop"}) by (pod_name)', start=startTime, end=stopTime, step='5s')) """ # Can use queries below to find rate of change also """ cpuAvg = json.loads(prom.query_rang(metric='sum(rate(container_cpu_usage_seconds_total{namespace="robot-shop"}[1m])) by (pod_name)', start=startTime, end=stopTime, step='5s')) memWriteBavg = json.loads(prom.query_rang(metric='sum(rate(container_fs_writes_bytes_total{namespace="robot-shop"}[1m])) by (pod_name)', start=startTime, end=stopTime, step='5s')) memReadBavg = json.loads(prom.query_rang(metric='sum(rate(container_fs_reads_bytes_total{namespace="robot-shop"}[1m])) by (pod_name)', start=startTime, end=stopTime, step='5s')) netReadBavg = json.loads(prom.query_rang(metric='sum(rate(container_network_receive_bytes_total{namespace="robot-shop"}[1m])) by (pod_name)', start=startTime, end=stopTime, step='5s')) netWriteBavg = json.loads(prom.query_rang(metric='sum(rate(container_network_transmit_bytes_total{namespace="robot-shop"}[1m])) by (pod_name)', start=startTime, end=stopTime, step='5s')) """ podMetricsDict = {} # List of podDataCollection objects timestampList = [] # List of scraped timestamps podNameList = [] # List of scraped pods # Create list of podDataCollection objects, with CPU vals: for pod in cpu5s['data']['result']: p = podDataCollection(pod['metric']['pod_name']) podNameList.append(pod['metric']['pod_name']) p.cpu5s = pod['values'] podMetricsDict[p.podName] = p for tStamp, val in pod['values']: timestampList.append(tStamp) for pod in memWriteB5s['data']['result']: podMetricsDict[pod['metric']['pod_name']].memW5s = pod['values'] for pod in memReadB5s['data']['result']: podMetricsDict[pod['metric']['pod_name']].memR5s = pod['values'] for pod in netWriteB5s['data']['result']: podMetricsDict[pod['metric']['pod_name']].netW5s = pod['values'] for pod in netReadB5s['data']['result']: podMetricsDict[pod['metric']['pod_name']].netR5s = pod['values'] createRawCSVs(timestampList, podNameList, testDirPath, podMetricsDict)
from prometheus_http_client import Prometheus prometheus = Prometheus()
def send_query(query, start, end, step, url): prometheus = Prometheus() prometheus.url = url res = prometheus.query_rang(metric=query, start=start, end=end, step=step) return res
class PrometheusBackend: """Backend for querying metrics from Prometheus.""" def __init__(self, client=None, url=None, headers=None): self.client = client if not self.client: if url: os.environ['PROMETHEUS_URL'] = url if headers: os.environ['PROMETHEUS_HEAD'] = json.dumps(headers) self.client = Prometheus() def query_sli(self, timestamp, window, slo_config): """Query SLI value from a given PromQL expression. Args: timestamp (int): UNIX timestamp. window (int): Window (in seconds). slo_config (dict): SLO configuration. Returns: float: SLI value. """ conf = slo_config['backend'] measurement = conf['measurement'] expr = measurement['expression'] response = self.query(expr, window, timestamp, operators=[]) sli_value = PrometheusBackend.count(response) LOGGER.debug(f"SLI value: {sli_value}") return sli_value def good_bad_ratio(self, timestamp, window, slo_config): """Compute good bad ratio from two metric filters. Args: timestamp (int): UNIX timestamp. window (int): Window (in seconds). slo_config (dict): SLO configuration. Note: At least one of `filter_bad` or `filter_valid` is required. Returns: tuple: A tuple of (good_count, bad_count). """ conf = slo_config['backend'] good = conf['measurement']['filter_good'] bad = conf['measurement'].get('filter_bad') valid = conf['measurement'].get('filter_valid') operators = conf['measurement'].get('operators', ['increase', 'sum']) # Replace window by its value in the error budget policy step res = self.query(good, window, timestamp, operators) good_count = PrometheusBackend.count(res) if bad: res = self.query(bad, window, timestamp, operators) bad_count = PrometheusBackend.count(res) elif valid: res = self.query(valid, window, timestamp, operators) valid_count = PrometheusBackend.count(res) bad_count = valid_count - good_count else: raise Exception("`filter_bad` or `filter_valid` is required.") LOGGER.debug(f'Good events: {good_count} | ' f'Bad events: {bad_count}') return (good_count, bad_count) # pylint: disable=unused-argument def distribution_cut(self, timestamp, window, slo_config): """Query events for distributions (histograms). Args: timestamp (int): UNIX timestamp. window (int): Window (in seconds). slo_config (dict): SLO configuration. Returns: float: SLI value. """ conf = slo_config['backend'] measurement = conf['measurement'] expr = measurement['expression'] threshold_bucket = measurement['threshold_bucket'] labels = {"le": threshold_bucket} res_good = self.query(expr, window, operators=['increase', 'sum'], labels=labels) good_count = PrometheusBackend.count(res_good) # We use the _count metric to figure out the 'valid count'. # Trying to get the valid count from the _bucket metric query is hard # due to Prometheus 'le' syntax that doesn't have the alternative 'ge' # See https://github.com/prometheus/prometheus/issues/2018. expr_count = expr.replace('_bucket', '_count') res_valid = self.query(expr_count, window, operators=['increase', 'sum']) valid_count = PrometheusBackend.count(res_valid) bad_count = valid_count - good_count LOGGER.debug(f'Good events: {good_count} | ' f'Bad events: {bad_count}') return (good_count, bad_count) # pylint: disable=unused-argument def query(self, filter, window, timestamp=None, operators=[], labels={}): """Query Prometheus server. Args: filter (str): Query filter. window (int): Window (in seconds). timestamp (int): UNIX timestamp. operators (list): List of PromQL operators to apply on query. labels (dict): Labels dict to add to existing query. Returns: dict: Response. """ filter = PrometheusBackend._fmt_query(filter, window, operators, labels) LOGGER.debug(f'Query: {filter}') response = self.client.query(metric=filter) response = json.loads(response) LOGGER.debug(pprint.pformat(response)) return response @staticmethod def count(response): """Count events in Prometheus response. Args: response (dict): Prometheus query response. Returns: int: Event count. """ # Note: this function could be replaced by using the `count_over_time` # function that Prometheus provides. try: return float(response['data']['result'][0]['value'][1]) except (IndexError, KeyError) as exception: LOGGER.warning("Couldn't find any values in timeseries response.") LOGGER.debug(exception, exc_info=True) return NO_DATA # no events in timeseries @staticmethod def _fmt_query(query, window, operators=[], labels={}): """Format Prometheus query: * If the PromQL expression has a `window` placeholder, replace it by the current window. Otherwise, append it to the expression. * If operators are defined, apply them to the metric in sequential order. * If labels are defined, append them to existing labels. Args: query (str): Original query in YAML config. window (int): Query window (in seconds). operators (list): Operators to wrap query with. labels (dict): Labels dict to add to existing query. Returns: str: Formatted query. """ query = query.strip() if '[window' in query: query = query.replace('[window', f'[{window}s') else: query += f'[{window}s]' for operator in operators: query = f'{operator}({query})' for key, value in labels.items(): query = query.replace('}', f', {key}="{value}"}}') return query
if 3 > len(argv): print('Command line error: Prometheus URL and push gateway are required.') print('Usage:') print(' %s <Prometheus URL> <push gateway host:port> [<past-days>]' % (argv[0], )) exit(1) prometheus_url = argv[1] pgw_url = argv[2] past_days = 7 if 4 == len(argv): past_days = int(argv[3]) environ['PROMETHEUS_URL'] = prometheus_url p = Prometheus() for mesh in ["bare-metal", "svcmesh-linkerd", "svcmesh-istio"]: r = CollectorRegistry() workaround = mesh g, percs, runs = create_summary_gauge(p, mesh, r, past_days=past_days) dg, dpercs, druns = create_summary_gauge(p, mesh, r, detailed=True, past_days=past_days) print("%s: %d runs with %d percentiles (coarse)" % (mesh, runs, percs)) print("%s: %d runs with %d percentiles (detailed)" % (mesh, druns, dpercs))
class PrometheusBackend(MetricBackend): """Backend for querying metrics from Prometheus.""" def __init__(self, **kwargs): self.client = kwargs.pop('client') if not self.client: url = kwargs.get('url') headers = kwargs.get('headers') if url: os.environ['PROMETHEUS_URL'] = url if headers: os.environ['PROMETHEUS_HEAD'] = json.dumps(headers) LOGGER.debug(f'Prometheus URL: {url}') LOGGER.debug(f'Prometheus headers: {headers}') self.client = Prometheus() def query_sli(self, **kwargs): """Query SLI value from a given PromQL expression. Args: kwargs (dict): timestamp (int): Timestamp to query. window (int): Window to query (in seconds). measurement (dict): expression (str): PromQL expression. Returns: float: SLI value. """ window = kwargs['window'] measurement = kwargs['measurement'] expr = measurement['expression'] expression = expr.replace("[window]", f"[{window}s]") data = self.query(expression) LOGGER.debug( f"Expression: {expression} | Result: {pprint.pformat(data)}") try: sli_value = float(data['data']['result'][0]['value'][1]) except IndexError: sli_value = 0 LOGGER.debug(f"SLI value: {sli_value}") return sli_value def good_bad_ratio(self, **kwargs): """Compute good bad ratio from two metric filters. Args: kwargs (dict): window (str): Query window. measurement (dict): Measurement config filter_good (str): PromQL query for good events. filter_bad (str, optional): PromQL query for bad events. filter_valid (str, optional): PromQL query for valid events. Note: At least one of `filter_bad` or `filter_valid` is required. Returns: tuple: A tuple of (good_event_count, bad_event_count). """ window = kwargs['window'] filter_good = kwargs['measurement']['filter_good'] filter_bad = kwargs['measurement'].get('filter_bad') filter_valid = kwargs['measurement'].get('filter_valid') # Replace window by its value in the error budget policy step expr_good = filter_good.replace('[window]', f'[{window}s]') res_good = self.query(expr_good) good_event_count = PrometheusBackend.count(res_good) if filter_bad: expr_bad = filter_bad.replace('[window]', f'[{window}s]') res_bad = self.query(expr_bad) bad_event_count = PrometheusBackend.count(res_bad) elif filter_valid: expr_valid = filter_valid.replace('[window]', f'[{window}s]') res_valid = self.query(expr_valid) bad_event_count = \ PrometheusBackend.count(res_valid) - good_event_count else: raise Exception("`filter_bad` or `filter_valid` is required.") LOGGER.debug(f'Good events: {good_event_count} | ' f'Bad events: {bad_event_count}') return (good_event_count, bad_event_count) def query(self, filter): """Query Prometheus server. Args: filter (str): Query filter. Returns: dict: Response. """ response = self.client.query(metric=filter) response = json.loads(response) LOGGER.debug(pprint.pformat(response)) return response @staticmethod def count(response): """Count events in Prometheus response. Args: response (dict): Prometheus query response. Returns: int: Event count. """ # Note: this function could be replaced by using the `count_over_time` # function that Prometheus provides. try: return len(response['data']['result'][0]['values']) except (IndexError, KeyError) as exception: LOGGER.warning("Couldn't find any values in timeseries response") LOGGER.debug(exception) return 0 # no events in timeseries
from prometheus_http_client import Prometheus import prometheus_http_client from prometheus_http_client.prometheus import * import os os.environ['PROMETHEUS_URL'] = 'http://demo.robustperception.io:9090/' @prom def up(*args, **kwargs): pass @relabel('100 - (avg by (instance, job) (irate(node_cpu{mode="idle"}[5m])) * 100)') def hello(*args,**kwargs): pass if __name__ == '__main__': print("Up metrics ::",up()) print("Node CPU Metrics :",hello()) print("UP With Range ::") p = Prometheus() print(p.query_rang(metric="up", start=1570221677, end=1570225677)) print(p.label_values('job')) print(dir(Prometheus))
class PrometheusBackend: """Backend for querying metrics from Prometheus.""" def __init__(self, client=None, url=None, headers=None): self.client = client if not self.client: if url: os.environ['PROMETHEUS_URL'] = url if headers: os.environ['PROMETHEUS_HEAD'] = json.dumps(headers) LOGGER.debug(f'Prometheus URL: {url}') LOGGER.debug(f'Prometheus headers: {headers}') self.client = Prometheus() def query_sli(self, timestamp, window, slo_config): """Query SLI value from a given PromQL expression. Args: timestamp (int): UNIX timestamp. window (int): Window (in seconds). slo_config (dict): SLO configuration. Returns: float: SLI value. """ conf = slo_config['backend'] measurement = conf['measurement'] expr = measurement['expression'] expression = expr.replace("[window", f"[{window}s") data = self.query(expression, timestamp) LOGGER.debug( f"Expression: {expression} | Result: {pprint.pformat(data)}") try: sli_value = float(data['data']['result'][0]['value'][1]) except IndexError: sli_value = 0 LOGGER.debug(f"SLI value: {sli_value}") return sli_value def good_bad_ratio(self, timestamp, window, slo_config): """Compute good bad ratio from two metric filters. Args: timestamp (int): UNIX timestamp. window (int): Window (in seconds). slo_config (dict): SLO configuration. Note: At least one of `filter_bad` or `filter_valid` is required. Returns: tuple: A tuple of (good_count, bad_count). """ conf = slo_config['backend'] filter_good = conf['measurement']['filter_good'] filter_bad = conf['measurement'].get('filter_bad') filter_valid = conf['measurement'].get('filter_valid') # Replace window by its value in the error budget policy step expr_good = filter_good.replace('[window', f'[{window}s') res_good = self.query(expr_good) good_count = PrometheusBackend.count(res_good) if filter_bad: expr_bad = filter_bad.replace('[window', f'[{window}s') res_bad = self.query(expr_bad, timestamp) bad_count = PrometheusBackend.count(res_bad) elif filter_valid: expr_valid = filter_valid.replace('[window', f'[{window}s') res_valid = self.query(expr_valid, timestamp) bad_count = PrometheusBackend.count(res_valid) - good_count else: raise Exception("`filter_bad` or `filter_valid` is required.") LOGGER.debug(f'Good events: {good_count} | ' f'Bad events: {bad_count}') return (good_count, bad_count) def query(self, filter, timestamp=None): # pylint: disable=unused-argument """Query Prometheus server. Args: filter (str): Query filter. timestamp (int): UNIX timestamp. Returns: dict: Response. """ response = self.client.query(metric=filter) response = json.loads(response) LOGGER.debug(pprint.pformat(response)) return response @staticmethod def count(response): """Count events in Prometheus response. Args: response (dict): Prometheus query response. Returns: int: Event count. """ # Note: this function could be replaced by using the `count_over_time` # function that Prometheus provides. try: return len(response['data']['result'][0]['values']) except (IndexError, KeyError) as exception: LOGGER.warning("Couldn't find any values in timeseries response") LOGGER.debug(exception, exc_info=True) return 0 # no events in timeseries
def __init__(self, **kwargs): self.client = kwargs.get('client') if not self.client: self.client = Prometheus(**kwargs)
def promQueries(startTime, stopTime, testDirPath): prom = Prometheus() namespace = "robot-shop" step = '5s' # If you’re using Kubernetes 1.16 and above you’ll have to use pod instead of pod_name and container instead of container_name. # Can use queries below to find rate of change also cpu5s = json.loads( prom.query_rang( metric='sum(container_cpu_usage_seconds_total{namespace="' + namespace + '"}) by (pod)', start=startTime, end=stopTime, step=step)) memWriteB5s = json.loads( prom.query_rang( metric='sum(container_fs_writes_bytes_total{namespace="' + namespace + '"}) by (pod)', start=startTime, end=stopTime, step=step)) memReadB5s = json.loads( prom.query_rang( metric='sum(container_fs_reads_bytes_total{namespace="' + namespace + '"}) by (pod)', start=startTime, end=stopTime, step=step)) netReadB5s = json.loads( prom.query_rang( metric='sum(container_network_receive_bytes_total{namespace="' + namespace + '"}) by (pod)', start=startTime, end=stopTime, step=step)) netWriteB5s = json.loads( prom.query_rang( metric='sum(container_network_transmit_bytes_total{namespace="' + namespace + '"}) by (pod)', start=startTime, end=stopTime, step=step)) #cpu5s = json.loads(prom.query_rang(metric='sum(irate(container_cpu_usage_seconds_total{namespace="'+namespace+'"}[1m])) by (pod)', start=startTime, end=stopTime, step=step)) #memWriteB5s = json.loads(prom.query_rang(metric='sum(rate(container_fs_writes_bytes_total{namespace="'+namespace+'"}[1m])) by (pod)', start=startTime, end=stopTime, step=step)) #memReadB5s = json.loads(prom.query_rang(metric='sum(rate(container_fs_reads_bytes_total{namespace="'+namespace+'"}[1m])) by (pod)', start=startTime, end=stopTime, step=step)) #netReadB5s = json.loads(prom.query_rang(metric='sum(irate(container_network_receive_bytes_total{namespace="'+namespace+'"}[1m])) by (pod)', start=startTime, end=stopTime, step=step)) #netWriteB5s = json.loads(prom.query_rang(metric='sum(irate(container_network_transmit_bytes_total{namespace="'+namespace+'"}[1m])) by (pod)', start=startTime, end=stopTime, step=step)) podMetricsDict = {} # List of podDataCollection objects timestampList = [] # List of scraped timestamps podNameList = [] # List of scraped pods tmp_pod = [] print("cpu5s, ", cpu5s) # Create list of podDataCollection objects, with CPU vals: for pod in cpu5s['data']['result']: p = podDataCollection(pod['metric']['pod']) podNameList.append(pod['metric']['pod']) p.cpu5s = pod['values'] podMetricsDict[p.podName] = p if not tmp_pod: tmp_pod = pod['values'] #print("tmp_pdo", tmp_pod) for tStamp, val in tmp_pod: timestampList.append(tStamp) for pod in memWriteB5s['data']['result']: podMetricsDict[pod['metric']['pod']].memW5s = pod['values'] for pod in memReadB5s['data']['result']: podMetricsDict[pod['metric']['pod']].memR5s = pod['values'] for pod in netWriteB5s['data']['result']: podMetricsDict[pod['metric']['pod']].netW5s = pod['values'] for pod in netReadB5s['data']['result']: podMetricsDict[pod['metric']['pod']].netR5s = pod['values'] #print(podMetricsDict) createRawCSVs(timestampList, podNameList, testDirPath, podMetricsDict)