class TestSummary(unittest.TestCase): def setUp(self): self.registry = CollectorRegistry() self.summary = Summary('s', 'help', registry=self.registry) def test_summary(self): self.assertEqual(0, self.registry.get_sample_value('s_count')) self.assertEqual(0, self.registry.get_sample_value('s_sum')) self.summary.observe(10) self.assertEqual(1, self.registry.get_sample_value('s_count')) self.assertEqual(10, self.registry.get_sample_value('s_sum')) def test_function_decorator(self): self.assertEqual(0, self.registry.get_sample_value('s_count')) @self.summary.time() def f(): pass f() self.assertEqual(1, self.registry.get_sample_value('s_count')) def test_block_decorator(self): self.assertEqual(0, self.registry.get_sample_value('s_count')) with self.summary.time(): pass self.assertEqual(1, self.registry.get_sample_value('s_count'))
def emit_summary_metric(registry: CollectorRegistry, metric_name: str, metric_description: str, seconds: float): """ Emits a metric of type Summary, that takes into account the number of times a function is called in a period of time. """ try: # Add suffix to the metric name and prefix to the metric description metric_name = metric_name + "Summary" metric_description = "Summary metric description: " + metric_description summary = Summary(metric_name, metric_description, registry=registry) summary.observe(seconds) @summary.time() def dummy_function_with_sleep(seconds): """A dummy function""" time.sleep(seconds) dummy_function_with_sleep(0.1) dummy_function_with_sleep(0.2) dummy_function_with_sleep(0.3) dummy_function_with_sleep(0.2) dummy_function_with_sleep(0.1) ok_message("Summary metric '{}' was created".format(metric_name)) except Exception as error: error_message("Error while emitting Summary metric: {}".format(error))
def print_hello(): registry = CollectorRegistry() c = Counter('count_exceptions', 'counts number of successes and failures', labelnames=['type'], registry=registry) s = Summary('time_delta', 'execution time of print_hello function', registry=registry) for i in range(randint(1, 10)): start = timer() time.sleep(random()*10) try: if randint(0, 1) == 1: raise Exception c.labels(type='success').inc() except: c.labels(type='failure').inc() end = timer() s.observe(timedelta(seconds=end - start).seconds) push_to_gateway('%s:9091' % GATEWAY, job='print_hello', registry=registry) return 'Hello world!'
class Metric: """ Metric class abstract away the complexity of dealing with Prometheus data types. """ def __init__(self, name, metric_type, description, buckets): self.name = name self.type = metric_type if metric_type == 'Counter': self._metric = Counter(name, description) elif metric_type == 'Gauge': self._metric = Gauge(name, description) elif metric_type == 'Histogram': self._metric = Histogram(name, description, buckets=buckets) elif metric_type == 'Summary': self._metric = Summary(name, description) def report(self, value): value = float(value) if self.type == 'Counter': self._metric.inc(value) elif self.type == 'Gauge': self._metric.set(value) elif self.type == 'Histogram' or self.type == 'Summary': self._metric.observe(value)
def summary_example(): registry = CollectorRegistry() s = Summary('test_summary', 'Description of summary', registry=registry) s.observe(random.randint(0, 20)) push_to_gateway('192.168.10.3:9091', job=Summary.__name__.lower(), registry=registry)
class EventCollector(object): def __init__(self, url, username, password): self.zapi = ZabbixAPI(url) self.zapi.login(username, password) self.last_collection = int(time.time()) # Create metric objs self.event_duration = Summary( 'zabbix_event_duration', 'Summary of Zabbix event resolution duration') def collect(self): # Get a list of all events with value=OK ok_events = self.zapi.event.get( value=0, time_from=self.last_collection, output='extend', selectHosts=['host'], sortfield='clock', ) self.last_collection = int(time.time()) # Determine the corresponding problem events p_ids = [] events = defaultdict({}) for ok in ok_events: p_ids.append(ok['c_eventid']) events[ok['eventid']]['end'] = ok['clock'] events[ok['eventid']]['hosts'] = ok['hosts'][0]['host'] events[ok['eventid']]['template'] = ok['objectid'] problem_events = self.zapi.event.get( eventids=p_ids, output='extend', ) for prob in problem_events: events[prob['r_eventid']]['start'] = ok['clock'] for event in sorted(events, key=lambda x: x['end']): duration = event['end'] - event['start'] self.event_duration.observe(duration, labels={ 'host': host['host'], 'templateid': t['templateid'], }) yield self.event_duration
def notify_success(self, source, hostname, filename, stats): registry = CollectorRegistry() s = Summary('backup_size', 'Size of backup file in bytes', registry=registry) s.observe(stats.size) s = Summary('backup_dumptime', 'Time taken to dump in seconds', registry=registry) s.observe(stats.dumptime) s = Summary('backup_uploadtime', 'Time taken to upload backup in seconds', registry=registry) s.observe(stats.uploadtime) g = Gauge('backup_timestamp', 'Time backup completed as seconds-since-the-epoch', registry=registry) g.set_to_current_time() def auth_handler(url, method, timeout, headers, data): return basic_auth_handler(url, method, timeout, headers, data, self.username, self.password) try: push_to_gateway(self.url, job=source.id, registry=registry, handler=auth_handler) logging.info("Pushed metrics for job '%s' to gateway (%s)" % (source.id, self.url)) except Exception as e: logging.error( "Unable to push metrics for job '%s' to gateway (%s): %s" % (source.id, self.url, str(e)))
def prometheus_solid(context): s = Summary( 'request_latency_seconds', 'Description of summary', registry=context.resources.prometheus.registry, ) s.observe(4.7) request_time = Summary( 'response_latency_seconds', 'Response latency (seconds)', registry=context.resources.prometheus.registry, ) with request_time.time(): time.sleep(1) recorded = context.resources.prometheus.registry.get_sample_value( 'request_latency_seconds_sum') assert abs(4.7 - recorded) < EPS recorded = context.resources.prometheus.registry.get_sample_value( 'response_latency_seconds_sum') assert abs(1.0 - recorded) < 1.0
def run_app(): args = parser.parse_args() start_http_server(args.prometheus_port) summary = Summary(args.prometheus_metric, '') send_func = lambda value: summary.observe(value) rps = args.rps assert rps > 0, 'RPS should be > 0' # Mu and alpha have these formulas - more info in ipynb mu = 1000 / rps alpha = args.response_time_avg assert alpha > 0, 'response_time_avg should be > 0' while (True): send_batch(alpha, mu, send_func)
def notify_success(self, source, hostname, filename, stats): registry = CollectorRegistry() s = Summary('backup_size', 'Size of backup file in bytes', registry=registry) s.observe(stats.size) s = Summary('backup_dumptime', 'Time taken to dump and compress/encrypt backup in seconds', registry=registry) s.observe(stats.dumptime) s = Summary('backup_uploadtime', 'Time taken to upload backup in seconds', registry=registry) s.observe(stats.uploadtime) if stats.retained_copies is not None: g = Gauge('backup_retained_copies', 'Number of retained backups found on destination', registry=registry) g.set(stats.retained_copies) g = Gauge('backup_timestamp', 'Time backup completed as seconds-since-the-epoch', registry=registry) g.set_to_current_time() def auth_handler(url, method, timeout, headers, data): return basic_auth_handler(url, method, timeout, headers, data, self.username, self.password) push_to_gateway(self.url, job=source.id, registry=registry, handler=auth_handler) logging.info("Pushed metrics for job '%s' to gateway (%s)" % (source.id, self.url))
def notify_success(self, source, hostname, filename, stats): registry = CollectorRegistry() s = Summary('backup_size', 'Size of backup file in bytes', registry=registry) s.observe(stats.size) s = Summary( 'backup_dumptime', 'Time taken to dump and compress/encrypt backup in seconds', registry=registry) s.observe(stats.dumptime) s = Summary('backup_uploadtime', 'Time taken to upload backup in seconds', registry=registry) s.observe(stats.uploadtime) if stats.retained_copies is not None: g = Gauge('backup_retained_copies', 'Number of retained backups found on destination', registry=registry) g.set(stats.retained_copies) g = Gauge('backup_timestamp', 'Time backup completed as seconds-since-the-epoch', registry=registry) g.set_to_current_time() def auth_handler(url, method, timeout, headers, data): return basic_auth_handler(url, method, timeout, headers, data, self.username, self.password) push_to_gateway(self.url, job=source.id, registry=registry, handler=auth_handler) logging.info("Pushed metrics for job '%s' to gateway (%s)" % (source.id, self.url))
class LookerMetricFetcher(object): def __init__(self, *, client_id, client_secret, looker_base_url, dashboard_id): self._query_url = "{}/api/3.0/queries/run/json".format(looker_base_url) self._dashboard_id = dashboard_id self._dashboard_render_time_summary = Summary( "looker_dashboard_{}_render_time_seconds".format(dashboard_id), "Time in seconds until last tile of dashboard {} finished rendering" .format(dashboard_id)) self._successful_queries_counter = Counter( "looker_number_of_successful_queries_counter", "Number of successful queries to Looker's API") self._query_response_time_summary = Summary( "looker_query_response_time_seconds", "Length of time Looker took to respond to the performance query") self._auth = LookerAuth(client_id=client_id, client_secret=client_secret, looker_base_url=looker_base_url) self._max_event_id = None def observe(self): while True: self._fetch_metrics() time.sleep(60) def _fetch_metrics(self): data = json.dumps({ "model": "i__looker", "view": "dashboard_performance", "fields": [ "dashboard_performance.seconds_until_last_tile_finished_rendering", "event.id" ], "filters": self._filters(), }) headers = {"Authorization": "Bearer {}".format(self._auth.get_token())} response = requests.post(self._query_url, headers=headers, data=data) if response.status_code >= 500: raise LookerQueryError if response.status_code >= 400: if "authentication" in response.json()["message"]: raise LookerAuthenticationError else: # TODO: Add actual useful exception raise Exception response_time = response.elapsed.seconds + response.elapsed.microseconds / 1000000 self._query_response_time_summary.observe(response_time) self._successful_queries_counter.inc() for result in response.json(): event_id = int(result["event.id"]) if self._max_event_id is None or event_id > self._max_event_id: self._max_event_id = event_id self._dashboard_render_time_summary.observe(result[ "dashboard_performance.seconds_until_last_tile_finished_rendering"] ) def _filters(self): if self._max_event_id is None: return { "history.real_dash_id": self._dashboard_id, "dashboard_performance.seconds_until_last_tile_finished_rendering": "NOT NULL", "dashboard_performance.last_event_at_date": "10 minutes", } return { "history.real_dash_id": self._dashboard_id, "dashboard_performance.seconds_until_last_tile_finished_rendering": "NOT NULL", "event.id": ">{}".format(self._max_event_id) }
cgi_user=miner.get('user', 'root'), cgi_password=miner.get('password', 'root')) except KeyError as e: if e.args[0] == 'ip': logger.error(f'IP for miner {miner} does not exist') elif e.args[0] == 'name': logger.warning( f'Name for miner {miner["ip"]} does not exist') else: logger.exception(e) if m.is_alive(): miner_list.append(m) if is_str_canbe_int(m.system_info.uptime): uptime = datetime.timedelta( minutes=int(m.system_info.uptime)) antminer_uptime_seconds.observe(uptime.total_seconds()) else: uptime_h, uptime_m = m.system_info.uptime.split(':') uptime = datetime.timedelta(hours=int(uptime_h), minutes=int(uptime_m)) antminer_uptime_seconds.observe(uptime.total_seconds()) # m.verify_kernel_log() logger.info( f'Connected! Now monitoring this AntMiner: {m.system_info.hostname or m.system_info.ipaddress}' ) else: logger.warning( f'Miner {m.system_info.ipaddress} not working on any pool!' ) seconds_per_miner = abs((SECONDS_4_CHECKS / len(miner_list)) - (4 * len(miner_list)))
# modify this to log exactly what you need: if 'ready' not in request.url: logger.info('%s %s %s %s' % (request.remote_addr, request.method, request.url, response.status)) return actual_response return _log_to_logger app = Bottle() app.install(log_to_logger) # Create a metric to track time spent and requests made. REQUEST_TIME = Summary('gitlabservice_request_processing_seconds', 'Time spent processing request') REQUEST_TIME.observe(4.7) #IN_PROGRESS = Gauge("gitlabservice_inprogress_requests", "help") REQUESTS = Counter('gitlabservice_http_requests_total', 'Description of counter', ['method', 'endpoint']) INFO = Info('gitlabservice_version', 'Description of info') INFO.info({'version': '1.1', 'port': '3001'}) def enable_cors(fn): def _enable_cors(*args, **kwargs): # set CORS headers response.headers['Access-Control-Allow-Origin'] = '*' response.headers[ 'Access-Control-Allow-Methods'] = 'GET, POST, PUT, OPTIONS' response.headers[ 'Access-Control-Allow-Headers'] = 'Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token'
default="", help="prediction model version", type=str) define("PIO_MODEL_SERVER_PORT", default="9876", help="tornado http server listen port", type=int) define("PIO_MODEL_SERVER_PROMETHEUS_PORT", default="8080", help="port to run the prometheus http metrics server on", type=int) # Create a metric to track time spent and requests made. REQUEST_TIME = Summary('request_processing_seconds', 'Model Server: Time spent processing request') REQUEST_TIME.observe(1.0) # Observe 1.0 (seconds in this case) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) logger.addHandler(ch) class Application(tornado.web.Application): def __init__(self): handlers = [ #(r"/", IndexHandler), # TODO: Disable DEPLOY if we're not explicitly in PIO_MODEL_ENVIRONMENT=dev mode (or equivalent) # url: /api/v1/model/deploy/$PIO_MODEL_TYPE/$PIO_MODEL_NAMESPACE/$PIO_MODEL_NAME/$PIO_MODEL_VERSION/ (r"/api/v1/model/deploy/([a-zA-Z\-0-9\.:,_]+)/([a-zA-Z\-0-9\.:,_]+)/([a-zA-Z\-0-9\.:,_]+)/([a-zA-Z\-0-9\.:,_]+)", ModelDeployPython3Handler),
class Source: @check_type def __init__( self, tap, tap_schema: dict = None, tap_name: str = None, tap_key: str = None, port: int = 8000, ): """ Parameters ---------- tap: str / object tap source. tap_schema: Dict, (default=None) data schema if tap an object. If `tap_schema` is None, it will auto generate schema. tap_name: str, (default=None) name for tap, necessary if tap is an object. it will throw an error if not a string if tap is an object. tap_key: str, (default=None) important non-duplicate key from `tap.emit()`, usually a timestamp. port: int, (default=8000) prometheus exporter port. """ if not isinstance(tap, str) and not hasattr(tap, 'emit'): raise ValueError( 'tap must a string or an object with method `emit`') if hasattr(tap, '__dict__'): self.tap = helper.Tap( tap, tap_schema=tap_schema, tap_name=tap_name, tap_key=tap_key, ) f = tap_name self.tap_schema = tap_schema else: self.tap = tap self.tap_schema = None f = tap self._targets = [] start_http_server(port) f = function.parse_name(f) self._tap_count = Counter(f'total_{f}', f'total rows {f}') self._tap_data = Summary(f'data_size_{f}', f'summary of data size {f} (KB)') self._tap_data_histogram = Histogram( f'data_size_histogram_{f}', f'histogram of data size {f} (KB)') def add(self, target): """ Parameters ---------- target: str / object target source. """ if not isinstance(target, str) and not hasattr(target, 'parse'): raise ValueError( 'target must a string or an object with method `parse`') if isinstance(target, str): if '.py' in target: target = f'python3 {target}' self._targets.append(target) def get_targets(self): """ Returns ---------- result: list of targets """ return self._targets @check_type def delete_target(self, index: int): """ Parameters ---------- index: int target index from `get_targets()`. """ self._targets.pop(index) @check_type def start( self, transformation: Callable = None, asynchronous: bool = False, debug: bool = True, ignore_null: bool = True, graceful_shutdown: int = 30, ): """ Parameters ---------- transformation: Callable, (default=None) a callable variable to transform tap data, this will auto generate new data schema. asynchronous: bool, (default=False) If True, emit to targets in async manner, else, loop from first target until last target. debug: bool, (default=True) If True, will print every rows emitted and parsed. ignore_null: bool, (default=True) If False, if one of schema value is Null, it will throw an exception. graceful_shutdown: int, (default=30) If bigger than 0, any error happened, will automatically shutdown after sleep. """ if graceful_shutdown < 0: raise ValueError('`graceful_shutdown` must bigger than -1') if not len(self._targets): raise Exception( 'targets are empty, please add a target using `source.add()` first.' ) self._pipes = [] for target in self._targets: if isinstance(target, str): p = Popen(target.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE) t = helper.Check_Error(p, graceful_shutdown) t.start() else: p = target self._pipes.append(helper.Target(p, target)) if isinstance(self.tap, str): pse = Popen(self.tap.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE) t = helper.Check_Error(pse, graceful_shutdown) t.start() pse = iter(pse.stdout.readline, b'') else: pse = self.tap self.tap.tap.count = 0 if transformation: from genson import SchemaBuilder builder = SchemaBuilder() builder.add_schema({'type': 'object', 'properties': {}}) else: builder = None try: for lines in pse: if lines is None: break if isinstance(lines, bytes): lines = [lines] if transformation: lines = helper.transformation( lines, builder, transformation, tap_schema=self.tap_schema, ) for line in lines: line = line.decode().strip() if len(line): if debug: logger.info(line) if '"type": "SCHEMA"' in line and not ignore_null: l = json.loads(line) for k, v in l['schema']['properties'].items(): if v['type'].lower() == 'null': raise ValueError( f'{k} is a NULL, some of database cannot accept NULL schema. To ignore this exception, simply set `ignore_null` = True.' ) self._tap_count.inc() self._tap_data.observe(sys.getsizeof(line) / 1000) self._tap_data_histogram.observe( sys.getsizeof(line) / 1000) if asynchronous: @gen.coroutine def loop(): r = yield [ _sinking(line, pipe) for pipe in self._pipes ] result = loop() if debug: logger.info(result.result()) else: for pipe in self._pipes: result = _sinking(line, pipe) if debug: logger.info(result.result()) if '"type": "RECORD"' in line and not isinstance( self.tap, str): self.tap.tap.count += 1 for pipe in self._pipes: if isinstance(pipe.target, Popen): try: pipe.target.communicate() except: pass except Exception as e: if graceful_shutdown > 0: logger.error(e) time.sleep(graceful_shutdown) os._exit(1) else: raise Exception(e)
msg = f"Failed to parse argument {param}" app.logger.error(msg) app.logger.debug(ex) resp = flask.Response(messages.error_message(), mimetype="application/json") resp.headers["Friendbot-Error"] = "True" return resp sentence = utils.get_sentence(user, channel, cache) payload = messages.prompt_message(sentence, user, channel) resp = flask.Response(payload, mimetype="application/json") resp.headers["Friendbot-Error"] = "False" resp.headers["Friendbot-User"] = user resp.headers["Friendbot-Channel"] = channel req_time = time.time() - start_time req_time_ms = round(req_time * 1000, 3) length_summary.observe(req_time) msg = f"{real_name} ({user_id}) generated a sentence; C: {channel} U: {user} {req_time_ms}ms" app.logger.info(msg) return resp @app.route("/health", methods=["GET"]) def health_endpoint(): start_time = time.time() sentence = utils.get_sentence("None", "None", cache) resp = flask.Response(messages.health_message(sentence), mimetype="application/json") req_time = time.time() - start_time req_time_ms = round(req_time * 1000, 3) length_summary.observe(req_time) app.logger.debug(f"Health Check Successful {req_time_ms}ms")
def process_request(t): """A dummy function that takes some time.""" time.sleep(t) if __name__ == '__main__': # Start up the server to expose the metrics. start_http_server(8111) # examples for counter/gauge/summary/histogram c = Counter('myfake_failures_total', 'Description of counter') g = Gauge('myfake_inprogress_requests', 'Description of gauge') s = Summary('myfake_summary_request_latency_seconds', 'Description of summary') h = Histogram('myfake_histogram_request_latency_seconds', 'Description of histogram') while True: # counter example c.inc() # Increment by 1 # c.inc(random.random()) # Increment by given value # gauge example g.inc() # Increment by 1 # g.dec(10) # Decrement by given value # g.set(4.2) # Set to a given value # summary example s.observe(1.1) # Observe 1.1 (seconds in this case) # Generate some requests. process_request(random.random()) # histogram example h.observe(4.7) # Observe 4.7 (seconds in this case)
def get_request_time(hostname): s = Summary('request_latency_seconds', 'Description of summary') request_time = s.observe(4.7) print "summary of request latency: " + request_time return request_time
from flask import Response, Flask, request import prometheus_client from prometheus_client import Summary, Counter, Histogram, Gauge # Flask app = Flask(__name__) # Example values example_summary = Summary('alicek106_summary', 'Summary example') example_summary.observe(5.5) example_histogram = Histogram('alicek106_histogram', 'Histogram example', buckets=(1, 5, 10, 50, 100, 200, 500, 1000)) example_histogram.observe(1) example_histogram.observe(5) example_histogram.observe(10) example_histogram.observe(100) example_counter = Counter('alicek106_counter', 'Counter example') rate_example_gauge = Gauge('alicek106_gauge', 'Gauge example for rate()') rate_example_gauge.set(5) initial_value = 5 offset_value = 5 ## group_left example. It should be 'counter' type if you want to use. methods = ["get", "get", "put", "post", "post"] queries = [500, 404, 501, 500, 404] values = [24, 30, 3, 6, 21] error_gauge = Gauge('alicek106_http_errors', 'Test', ['method', 'code']) for i in range(0, len(methods)):
class RequestHandler: """ Class that handles the requests arriving to the gateway and the result extracted from the requests future. :param metrics_registry: optional metrics registry for prometheus used if we need to expose metrics from the executor or from the data request handler :param runtime_name: optional runtime_name that will be registered during monitoring """ def __init__( self, metrics_registry: Optional['CollectorRegistry'] = None, runtime_name: Optional[str] = None, ): self.request_init_time = {} if metrics_registry else None self._executor_endpoint_mapping = None if metrics_registry: with ImportExtensions( required=True, help_text= 'You need to install the `prometheus_client` to use the montitoring functionality of jina', ): from prometheus_client import Summary self._summary = Summary( 'receiving_request_seconds', 'Time spent processing request', registry=metrics_registry, namespace='jina', labelnames=('runtime_name', ), ).labels(runtime_name) else: self._summary = None def handle_request( self, graph: 'TopologyGraph', connection_pool: 'GrpcConnectionPool' ) -> Callable[['Request'], 'asyncio.Future']: """ Function that handles the requests arriving to the gateway. This will be passed to the streamer. :param graph: The TopologyGraph of the Flow. :param connection_pool: The connection pool to be used to send messages to specific nodes of the graph :return: Return a Function that given a Request will return a Future from where to extract the response """ async def gather_endpoints(request_graph): def _get_all_nodes(node, accum, accum_names): if node.name not in accum_names: accum.append(node) accum_names.append(node.name) for n in node.outgoing_nodes: _get_all_nodes(n, accum, accum_names) return accum, accum_names nodes = [] node_names = [] for origin_node in request_graph.origin_nodes: subtree_nodes, subtree_node_names = _get_all_nodes( origin_node, [], []) for st_node, st_node_name in zip(subtree_nodes, subtree_node_names): if st_node_name not in node_names: nodes.append(st_node) node_names.append(st_node_name) tasks_to_get_endpoints = [ node.get_endpoints(connection_pool) for node in nodes ] endpoints = await asyncio.gather(*tasks_to_get_endpoints) self._executor_endpoint_mapping = {} for node, (endp, _) in zip(nodes, endpoints): self._executor_endpoint_mapping[node.name] = endp.endpoints def _handle_request(request: 'Request') -> 'asyncio.Future': if self._summary: self.request_init_time[request.request_id] = time.time() # important that the gateway needs to have an instance of the graph per request request_graph = copy.deepcopy(graph) if graph.has_filter_conditions: request_doc_ids = request.data.docs[:, 'id'] # used to maintain order of docs that are filtered by executors tasks_to_respond = [] tasks_to_ignore = [] endpoint = request.header.exec_endpoint r = request.routes.add() r.executor = 'gateway' r.start_time.GetCurrentTime() # If the request is targeting a specific deployment, we can send directly to the deployment instead of # querying the graph for origin_node in request_graph.origin_nodes: leaf_tasks = origin_node.get_leaf_tasks( connection_pool, request, None, endpoint=endpoint, executor_endpoint_mapping=self._executor_endpoint_mapping, target_executor_pattern=request.header.target_executor, ) # Every origin node returns a set of tasks that are the ones corresponding to the leafs of each of their # subtrees that unwrap all the previous tasks. It starts like a chain of waiting for tasks from previous # nodes tasks_to_respond.extend( [task for ret, task in leaf_tasks if ret]) tasks_to_ignore.extend( [task for ret, task in leaf_tasks if not ret]) def _sort_response_docs(response): # sort response docs according to their order in the initial request def sort_by_request_order(doc): if doc.id in request_doc_ids: return request_doc_ids.index(doc.id) else: return len( request_doc_ids) # put new/unknown docs at the end sorted_docs = sorted(response.data.docs, key=sort_by_request_order) response.data.docs = DocumentArray(sorted_docs) async def _process_results_at_end_gateway( tasks: List[asyncio.Task], request_graph: TopologyGraph) -> asyncio.Future: if self._executor_endpoint_mapping is None: await asyncio.gather(gather_endpoints(request_graph)) partial_responses = await asyncio.gather(*tasks) partial_responses, metadatas = zip(*partial_responses) filtered_partial_responses = list( filter(lambda x: x is not None, partial_responses)) response = filtered_partial_responses[0] request_graph.add_routes(response) if graph.has_filter_conditions: _sort_response_docs(response) return response # In case of empty topologies if not tasks_to_respond: r.end_time.GetCurrentTime() future = asyncio.Future() future.set_result((request, {})) tasks_to_respond.append(future) return asyncio.ensure_future( _process_results_at_end_gateway(tasks_to_respond, request_graph)) return _handle_request def handle_result(self) -> Callable[['Request'], 'asyncio.Future']: """ Function that handles the result when extracted from the request future :return: Return a Function that returns a request to be returned to the client """ def _handle_result(result: 'Request'): """ Function that handles the result when extracted from the request future :param result: The result returned to the gateway. It extracts the request to be returned to the client :return: Returns a request to be returned to the client """ for route in result.routes: if route.executor == 'gateway': route.end_time.GetCurrentTime() if self._summary: self._summary.observe( time.time() - self.request_init_time[result.request_id]) return result return _handle_result
def do_GET(self): """ Process GET request :return: Response with Prometheus metrics """ # this will be used to return the total amount of time the request took start_time = time.time() # Create a metric to track time spent and requests made. request_time = Summary(self.P + 'request_processing_seconds', 'Time spent processing request', registry=self.registry) # get parameters from the URL url = urlparse(self.path) # following boolean will be passed to True if an error is detected during the argument parsing error_detected = False query_components = parse_qs(urlparse(self.path).query) ilo_host = None ilo_port = None ilo_user = None ilo_password = None try: ilo_host = query_components.get('ilo_host', [''])[0] or os.environ['ilo_host'] ilo_user = query_components.get('ilo_user', [''])[0] or os.environ['ilo_user'] ilo_password = query_components.get( 'ilo_password', [''])[0] or os.environ['ilo_password'] except KeyError as e: print_err("missing parameter %s" % e) self.return_error() error_detected = True try: ilo_port = int( query_components.get('ilo_port', [''])[0] or os.environ['ilo_port']) except KeyError as e: ilo_port = 443 if url.path == self.server.endpoint and ilo_host and ilo_user and ilo_password and ilo_port: ilo = None ssl_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) # Sadly, ancient iLO's aren't dead yet, so let's enable sslv3 by default ssl_context.options &= ~ssl.OP_NO_SSLv3 ssl_context.check_hostname = False ssl_context.set_ciphers(( 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:' '!eNULL:!MD5')) try: ilo = hpilo.Ilo(hostname=ilo_host, login=ilo_user, password=ilo_password, port=ilo_port, timeout=10, ssl_context=ssl_context) except hpilo.IloLoginFailed: print("ILO login failed") self.return_error() return except gaierror: print("ILO invalid address or port") self.return_error() return except hpilo.IloCommunicationError as e: print(e) self.return_error() return # get product and server name try: self.product_name = ilo.get_product_name() except: self.product_name = "Unknown HP Server" try: self.server_name = ilo.get_server_name() if self.server_name == "": self.server_name = ilo_host except: self.server_name = ilo_host # get health, mod by n27051538 self.embedded_health = ilo.get_embedded_health() self.watch_health_at_glance() self.watch_disks() self.watch_temperature() self.watch_fan() self.watch_ps() try: running = ilo.get_host_power_status() self.gauges['running'].labels( product_name=self.product_name, server_name=self.server_name).set(translate(running)) except: pass # for iLO3 patch network if ilo.get_fw_version()["management_processor"] == 'iLO3': print_err('Unknown iLO nic status') else: # get nic information for nic_name, nic in self.embedded_health[ 'nic_information'].items(): try: value = ['OK', 'Disabled', 'Unknown', 'Link Down'].index(nic['status']) except ValueError: value = 4 print_err('unrecognised nic status: {}'.format( nic['status'])) self.gauges['nic_status'].labels( product_name=self.product_name, server_name=self.server_name, nic_name=nic_name, ip_address=nic['ip_address']).set(value) # get firmware version try: fw_version = ilo.get_fw_version()["firmware_version"] self.gauges['firmware_version'].labels( product_name=self.product_name, server_name=self.server_name).set(fw_version) except: pass try: oa_info = ilo.get_oa_info() self.gauges['oa_info'].labels( product_name=self.product_name, server_name=self.server_name, oa_ip=oa_info.get('ipaddress', ''), encl=oa_info.get('encl', ''), location_bay=oa_info.get('location', ''), ).set(0) except: pass # get the amount of time the request took request_time.observe(time.time() - start_time) # generate and publish metrics metrics = generate_latest(self.registry) process_metrics = generate_latest(self.process_registry) self.send_response(200) self.send_header('Content-Type', 'text/plain') self.end_headers() self.wfile.write(metrics) self.wfile.write(process_metrics) elif url.path == '/': self.send_response(200) self.send_header('Content-Type', 'text/html') self.end_headers() self.wfile.write("""<html> <head><title>HP iLO Exporter</title></head> <body> <h1>HP iLO Exporter</h1> <p>Visit <a href="/metrics">Metrics</a> to use.</p> </body> </html>""") else: if not error_detected: self.send_response(404) self.end_headers()