def _get_validation_names(self, clusterinfo, filter=None): cluster, promurl, ssl_verify, promtoken = self._get_prometheus_info( clusterinfo) LOG.debug( "%s retrieving validation names for %s, filter %s", self.logmarker, cluster, filter, ) try: uri = "/api/v1/label/__name__/values" deploymentvalidation = self._promget(url=promurl, params={}, token=promtoken, ssl_verify=ssl_verify, uri=uri) except requests.exceptions.RequestException as details: LOG.error( "%s error accessing prometheus (%s): %s", self.logmarker, cluster, details, ) return None if filter: deploymentvalidation["data"] = [ n for n in deploymentvalidation["data"] if n.startswith(filter) ] return {"cluster": cluster, "data": deploymentvalidation["data"]}
def run(self, cname=None): validation_list = threaded.run(func=self._get_validation_names, iterable=self._get_clusters(cname), thread_pool_size=self.thread_pool_size, filter='deployment_validation_operator') validation_names = {} if validation_list: validation_names = { v['cluster']: v['data'] for v in validation_list if v } clusters = self._get_clusters(cname) self._get_token() for cluster in clusters: cluster_name = cluster['name'] if cluster_name not in validation_names: LOG.debug('%s Skipping cluster: %s', self.logmarker, cluster_name) continue LOG.debug('%s Processing cluster: %s', self.logmarker, cluster_name) validations = threaded.run(func=self._get_deploymentvalidation, iterable=validation_names[cluster_name], thread_pool_size=self.thread_pool_size, clusterinfo=cluster) threaded.run(func=self._post, iterable=validations, thread_pool_size=self.thread_pool_size) self._close_token()
def _get_service_slo(self, slo_document): LOG.debug("SLO: processing %s", slo_document["name"]) result = [] for ns in slo_document["namespaces"]: if not ns["cluster"].get("prometheusUrl"): continue promurl = ns["cluster"]["prometheusUrl"] ssl_verify = False if ns["cluster"]["spec"]["private"] else True promtoken = self._get_automationtoken( ns["cluster"]["automationToken"]) for slo in slo_document["slos"]: unit = slo["SLOTargetUnit"] expr = slo["expr"] template = jinja2.Template(expr) window = slo["SLOParameters"]["window"] promquery = template.render({"window": window}) prom_response = self._promget( url=promurl, params={"query": (f"{promquery}")}, token=promtoken, ssl_verify=ssl_verify, ) prom_result = prom_response["data"]["result"] if not prom_result: continue slo_value = prom_result[0]["value"] if not slo_value: continue slo_value = float(slo_value[1]) slo_target = float(slo["SLOTarget"]) # In Dash.DB we want to always store SLOs in percentages if unit == "percent_0_1": slo_value *= 100 slo_target *= 100 result.append({ "name": slo["name"], "SLIType": slo["SLIType"], "namespace": ns, "cluster": ns["cluster"], "service": ns["app"], "value": slo_value, "target": slo_target, "SLODoc": { "name": slo_document["name"] }, }) return result
def _get_service_slo(self, slo_document): LOG.debug('SLO: processing %s', slo_document['name']) result = [] for ns in slo_document['namespaces']: if not ns['cluster'].get('prometheusUrl'): continue promurl = ns['cluster']['prometheusUrl'] ssl_verify = False if ns['cluster']['spec']['private'] else True promtoken = self._get_automationtoken( ns['cluster']['automationToken']) for slo in slo_document['slos']: unit = slo['SLOTargetUnit'] expr = slo['expr'] template = jinja2.Template(expr) window = slo['SLOParameters']['window'] promquery = template.render({"window": window}) prom_response = self._promget( url=promurl, params={'query': (f'{promquery}')}, token=promtoken, ssl_verify=ssl_verify) prom_result = prom_response['data']['result'] if not prom_result: continue slo_value = prom_result[0]['value'] if not slo_value: continue slo_value = float(slo_value[1]) slo_target = float(slo['SLOTarget']) # In Dash.DB we want to always store SLOs in percentages if unit == "percent_0_1": slo_value *= 100 slo_target *= 100 result.append({ "name": slo['name'], "SLIType": slo['SLIType'], "namespace": ns, "cluster": ns['cluster'], "service": ns['app'], "value": slo_value, "target": slo_target, "SLODoc": { "name": slo_document['name'] }, }) return result
def _post(self, service_slo): if service_slo is None: return None for item in service_slo: LOG.debug(f'About to POST SLO JSON item to dashdotDB:\n{item}\n') response = None if self.dry_run: return response for item in service_slo: slo_name = item['name'] LOG.info('%s syncing slo %s', self.logmarker, slo_name) endpoint = (f'{self.dashdotdb_url}/api/v1/' f'serviceslometrics/{slo_name}') response = self._do_post(endpoint, item) try: response.raise_for_status() except requests.exceptions.HTTPError as details: LOG.error('%s error posting %s - %s', self.logmarker, slo_name, details) LOG.info('%s slo %s synced', self.logmarker, slo_name) return response
def _get_deploymentvalidation(self, validation, clusterinfo): cluster, promurl, ssl_verify, promtoken = self._get_prometheus_info( clusterinfo) LOG.debug('%s processing %s, %s', self.logmarker, cluster, validation) try: deploymentvalidation = self._promget( url=promurl, params={'query': (validation)}, token=promtoken, ssl_verify=ssl_verify) except requests.exceptions.RequestException as details: LOG.error('%s error accessing prometheus (%s): %s', self.logmarker, cluster, details) return None return {'cluster': cluster, 'data': deploymentvalidation}
def _post(self, deploymentvalidation): if deploymentvalidation is None: return cluster = deploymentvalidation["cluster"] # dvd.data.data.result.[{metric,values}] dvresult = deploymentvalidation.get("data").get("data").get("result") if dvresult is None: return LOG.info("%s Processing (%s) metrics for: %s", self.logmarker, len(dvresult), cluster) if not self.chunksize: self.chunksize = len(dvresult) if len(dvresult) <= int(self.chunksize): metrics = dvresult else: metrics = list(self._chunkify(dvresult, self.chunksize)) LOG.info( "%s Chunked metrics into (%s) elements for: %s", self.logmarker, len(metrics), cluster, ) # keep everything but metrics from prom blob deploymentvalidation["data"]["data"]["result"] = [] response = None for metric_chunk in metrics: # to keep future-prom-format compatible, # keeping entire prom blob but iterating on metrics by # self.chunksize max metrics in one post dvdata = deploymentvalidation["data"] # if metric_chunk isn't already a list, make it one if isinstance(metric_chunk, list): dvdata["data"]["result"] = metric_chunk else: dvdata["data"]["result"] = [metric_chunk] if not self.dry_run: endpoint = (f"{self.dashdotdb_url}/api/v1/" f"deploymentvalidation/{cluster}") response = self._do_post(endpoint, dvdata, (5, 120)) try: response.raise_for_status() except requests.exceptions.RequestException as details: LOG.error( "%s error posting DVO data (%s): %s", self.logmarker, cluster, details, ) LOG.info("%s DVO data for %s synced to DDDB", self.logmarker, cluster) return response
def _post(self, manifest): if manifest is None: return None cluster = manifest["cluster"] imagemanifestvuln = manifest["data"] response = None LOG.info("%s syncing cluster %s", self.logmarker, cluster) if self.dry_run: return response for item in imagemanifestvuln["items"]: endpoint = f"{self.dashdotdb_url}/api/v1/" f"imagemanifestvuln/{cluster}" response = self._do_post(endpoint, item) try: response.raise_for_status() except requests.exceptions.HTTPError as details: LOG.error("%s error posting %s - %s", self.logmarker, cluster, details) LOG.info("%s cluster %s synced", self.logmarker, cluster) return response
def _post(self, manifest): if manifest is None: return None cluster = manifest['cluster'] imagemanifestvuln = manifest['data'] response = None LOG.info('%s syncing cluster %s', self.logmarker, cluster) if self.dry_run: return response for item in imagemanifestvuln['items']: endpoint = (f'{self.dashdotdb_url}/api/v1/' f'imagemanifestvuln/{cluster}') response = self._do_post(endpoint, item) try: response.raise_for_status() except requests.exceptions.HTTPError as details: LOG.error('%s error posting %s - %s', self.logmarker, cluster, details) LOG.info('%s cluster %s synced', self.logmarker, cluster) return response
def _get_imagemanifestvuln(cluster, oc_map): LOG.info("%s processing %s", LOGMARKER, cluster) oc = oc_map.get(cluster) if not oc: LOG.log(level=oc.log_level, msg=oc.message) return None try: imagemanifestvuln = oc.get_all("ImageManifestVuln", all_namespaces=True) except StatusCodeError: LOG.info("%s not installed on %s", LOGMARKER, cluster) return None if not imagemanifestvuln: return None return {"cluster": cluster, "data": imagemanifestvuln}