def test_summary(self): s = Summary("ss", "A summary", ["a", "b"], registry=self.registry) s.labels("c", "d").observe(17) self.assertEqual( b'# HELP ss A summary\n# TYPE ss summary\nss_count{a="c",b="d"} 1.0\nss_sum{a="c",b="d"} 17.0\n', generate_latest(self.registry), )
def parse_filled_rows( filled_rows: Dict[str, List[Any]], scenario_name: str, client_versions: List[Dict[str, str]], target_file: str, ) -> None: implementation = "/".join(set(c["implementation"] for c in client_versions)) version = "/".join(set(c["version"] for c in client_versions)) registry = CollectorRegistry() summary = Summary( "task_duration", "The duration summary of a certain task", labelnames=[ "scenario", "task", "nodes_involved", "implementation", "version" ], unit="sec", registry=registry, namespace="scenario_player", ) k = lambda r: r.task_type for key, group in groupby(sorted(filled_rows["csv_rows"], key=k), key=k): task = key.split("(", 1)[0] group_list = list(group) for entry in group_list: summary.labels( scenario=scenario_name, task=task, nodes_involved=entry.nodes_involved, implementation=implementation, version=version, ).observe(entry.duration) write_to_textfile(target_file, registry)
def start_http_server(config_path, port, address=''): """ Start a HTTP API server for FreeSWITCH prometheus collector. """ duration = Summary( 'freeswitch_collection_duration_seconds', 'Duration of collections by the FreeSWITCH exporter', ['module'], ) errors = Counter( 'freeswitch_request_errors_total', 'Errors in requests to FreeSWITCH exporter', ['module'], ) # Load configuration. with open(config_path) as handle: config = yaml.safe_load(handle) # Initialize metrics. for module in config.keys(): # pylint: disable=no-member errors.labels(module) # pylint: disable=no-member duration.labels(module) app = FreeswitchExporterApplication(config, duration, errors) run_simple(address, port, app, threaded=True)
def start_http_server(config, port, address=''): """ Start a HTTP API server for Proxmox VE prometheus collector. """ duration = Summary( 'pve_collection_duration_seconds', 'Duration of collections by the PVE exporter', ['module'], ) errors = Counter( 'pve_request_errors_total', 'Errors in requests to PVE exporter', ['module'], ) # Initialize metrics. for module in config.keys(): # pylint: disable=no-member errors.labels(module) # pylint: disable=no-member duration.labels(module) app = PveExporterApplication(config, duration, errors) run_simple(address, port, app, threaded=True)
def test_summary(self): """Test that we can track summaries in Service303""" # Add a summary with a label to the regisry c = Summary('process_max_fds', 'A summary', ['result'], registry=self.registry) c.labels('success').observe(1.23) c.labels('failure').observe(2.34) # Build proto outputs summary1 = metrics_pb2.Summary(sample_count=1, sample_sum=1.23) summary2 = metrics_pb2.Summary(sample_count=1, sample_sum=2.34) metric1 = metrics_pb2.Metric(summary=summary1, timestamp_ms=1234000) metric2 = metrics_pb2.Metric(summary=summary2, timestamp_ms=1234000) family = metrics_pb2.MetricFamily( name=str(metricsd_pb2.process_max_fds), type=metrics_pb2.SUMMARY) metric1.label.add( name=str(metricsd_pb2.result), value='success') metric2.label.add( name=str(metricsd_pb2.result), value='failure') family.metric.extend([metric1, metric2]) with unittest.mock.patch('time.time') as mock_time: mock_time.side_effect = lambda: 1234 self.assertCountEqual(list(metrics_export.get_metrics(self.registry))[0].metric, family.metric)
class MetaBanditMonitor(object): def __init__(self, bento_service: BentoService, config: Config) -> None: self._version = bento_service.version service_name = bento_service.name namespace = bentoml_config("instrument").get("default_namespace") self._metric = Summary( name=service_name + "_oracle_metric", documentation=" Oracle Metric", namespace=namespace, labelnames=["endpoint", "service_version"], ) self._selected_arm = Counter( name=service_name + "_arm_total", documentation='Total number selected arm', namespace=namespace, labelnames=["endpoint", "service_version", "arm"]) def observe_metric_value(self, value: float, endpoint: str = _DEFAULT_ENDPOINT): self._metric.labels(endpoint, self._version).observe(value) def observe_selected_arm(self, arm: str, endpoint: str = _DEFAULT_ENDPOINT): self._selected_arm.labels(endpoint, self._version, arm).inc()
def test_summary(self): s = Summary('ss', 'A summary', ['a', 'b'], registry=self.registry) s.labels('c', 'd').observe(17) self.assertEqual(json.loads("""{"ss": {"samples": [{"sample_name": "ss_count", "labels": {"a": "c", "b": "d"}, "value": "1.0", "timestamp": null, "exemplar": {}}, {"sample_name": "ss_sum", "labels": {"a": "c", "b": "d"}, "value": "17.0", "timestamp": null, "exemplar": {}}, {"sample_name": "ss_created", "labels": {"a": "c", "b": "d"}, "value": "123.456", "timestamp": null, "exemplar": {}}], "help": "A summary", "type": "summary"}}"""), json.loads(self.json_exporter.generate_latest_json()))
def test_summary(self): s = Summary('ss', 'A summary', ['a', 'b'], registry=self.registry) s.labels('c', 'd').observe(17) self.assertEqual(b"""# HELP ss A summary # TYPE ss summary ss_count{a="c",b="d"} 1.0 ss_sum{a="c",b="d"} 17.0 ss_created{a="c",b="d"} 123.456 # EOF """, generate_latest(self.registry))
def run(self): logging.info("Starting thread for data gathering") logging.info("Mapping {} devices with custom names".format(len(self.mapped_names))) logging.info("Supporting {} device types: {}".format(len(self.supported_device_types), ",".join(self.supported_device_types))) gathering_counter = Counter('gathering_count', 'Amount of gathering runs', labelnames=['ccu'], namespace=self.METRICS_NAMESPACE) error_counter = Counter('gathering_errors', 'Amount of failed gathering runs', labelnames=['ccu'], namespace=self.METRICS_NAMESPACE) generate_metrics_summary = Summary('generate_metrics_seconds', 'Time spent in gathering runs', labelnames=['ccu'], namespace=self.METRICS_NAMESPACE) read_names_summary = Summary('read_names_seconds', 'Time spent reading names from CCU', labelnames=['ccu'], namespace=self.METRICS_NAMESPACE) gathering_loop_counter = 1 if len(self.mapped_names) == 0: # if no custom mapped names are given we use them from the ccu. self.reload_names_active = True with read_names_summary.labels(self.ccu_host).time(): self.mapped_names = self.read_mapped_names() logging.info("Read {} device names from CCU".format(len(self.mapped_names))) while True: if self.reload_names_active: if gathering_loop_counter % self.reload_names_interval == 0: try: with read_names_summary.labels(self.ccu_host).time(): self.mapped_names = self.read_mapped_names() except OSError as os_error: logging.info("Failed to read device names: {0}".format(os_error)) error_counter.labels(self.ccu_host).inc() except BaseException: logging.info("Failed to read device names: {0}".format(sys.exc_info())) error_counter.labels(self.ccu_host).inc() logging.info("Read {} device names from CCU".format(len(self.mapped_names))) gathering_counter.labels(self.ccu_host).inc() try: with generate_metrics_summary.labels(self.ccu_host).time(): self.generate_metrics() except OSError as os_error: logging.info("Failed to generate metrics: {0}".format(os_error)) error_counter.labels(self.ccu_host).inc() except BaseException: logging.info("Failed to generate metrics: {0}".format(sys.exc_info())) error_counter.labels(self.ccu_host).inc() finally: time.sleep(self.gathering_interval) gathering_loop_counter += 1
def run(self): logging.info("Starting thread for data gathering") logging.info("Mapping {} devices with custom names".format(len(self.mapped_names))) logging.info("Supporting {} device types: {}".format(len(self.supported_device_types), ",".join(self.supported_device_types))) gathering_counter = Counter('gathering_count', 'Amount of gathering runs', labelnames=['ccu'], namespace=self.METRICS_NAMESPACE) error_counter = Counter('gathering_errors', 'Amount of failed gathering runs', labelnames=['ccu'], namespace=self.METRICS_NAMESPACE) generate_metrics_summary = Summary('generate_metrics_seconds', 'Time spent in gathering runs', labelnames=['ccu'], namespace=self.METRICS_NAMESPACE) while True: gathering_counter.labels(self.ccu_host).inc() try: with generate_metrics_summary.labels(self.ccu_host).time(): self.generate_metrics() except OSError as os_error: logging.info("Failed to generate metrics: {0}".format(os_error)) error_counter.labels(self.ccu_host).inc() except: logging.info("Failed to generate metrics: {0}".format(sys.exc_info())) error_counter.labels(self.ccu_host).inc() finally: time.sleep(self.gathering_interval)
def test_summary(self): s = Summary('ss', 'A summary', ['a', 'b'], registry=self.registry) s.labels('c', 'd').observe(17) self.assertEqual( b'# HELP ss A summary\n# TYPE ss summary\nss_count{a="c",b="d"} 1.0\nss_sum{a="c",b="d"} 17.0\n', generate_latest(self.registry))
START_TIME = Summary('start_processing_seconds', 'Time spent in the /start handler') INSPIRE_TIME = Summary('inspire_processing_seconds', 'Time spent in the /inspire handler') INLINE_TIME = Summary('inline_processing_seconds', 'Time spent in the inline query handler') CHOSEN_INLINE_RESULTS = Counter( 'chosen_inline_results', 'Amount of inline results that were chosen by a user') REGULAR_INTERVAL_WORKER_TIME = Summary( 'regular_interval_worker_processing_seconds', 'Time spent for a single run cycle of this workercrawler run cycle', ['name']) CRAWLER_TIME = REGULAR_INTERVAL_WORKER_TIME.labels(name="crawler") UPLOADER_TIME = REGULAR_INTERVAL_WORKER_TIME.labels(name="uploader") ANALYSER_TIME = REGULAR_INTERVAL_WORKER_TIME.labels(name="analyser") UPLOADER_QUEUE_LENGTH = Gauge( 'uploader_queue_length', 'Number of entity ids in the uploader worker queue') ANALYSER_FIND_TEXT_TIME = Summary('analyser_find_text_processing_seconds', 'Time spent to find text for a given image', ['name']) GOOGLE_VISION_FIND_TEXT_TIME = ANALYSER_FIND_TEXT_TIME.labels( name=IMAGE_ANALYSIS_TYPE_GOOGLE_VISION) MICROSOFT_AZURE_FIND_TEXT_TIME = ANALYSER_FIND_TEXT_TIME.labels( name=IMAGE_ANALYSIS_TYPE_AZURE)
class DataRequestHandler: """Object to encapsulate the code related to handle the data requests passing to executor and its returned values""" def __init__( self, args: 'argparse.Namespace', logger: 'JinaLogger', metrics_registry: Optional['CollectorRegistry'] = None, **kwargs, ): """Initialize private parameters and execute private loading functions. :param args: args from CLI :param logger: the logger provided by the user :param metrics_registry: optional metrics registry for prometheus used if we need to expose metrics from the executor of from the data request handler :param kwargs: extra keyword arguments """ super().__init__() self.args = args self.args.parallel = self.args.shards self.logger = logger self._is_closed = False self._load_executor(metrics_registry) self._init_monitoring(metrics_registry) def _init_monitoring(self, metrics_registry: Optional['CollectorRegistry'] = None): if metrics_registry: with ImportExtensions( required=True, help_text='You need to install the `prometheus_client` to use the montitoring functionality of jina', ): from prometheus_client import Counter, Summary self._counter = Counter( 'document_processed', 'Number of Documents that have been processed by the executor', namespace='jina', labelnames=('executor_endpoint', 'executor', 'runtime_name'), registry=metrics_registry, ) self._request_size_metrics = Summary( 'request_size_bytes', 'The request size in Bytes', namespace='jina', labelnames=('executor_endpoint', 'executor', 'runtime_name'), registry=metrics_registry, ) else: self._counter = None self._request_size_metrics = None def _load_executor(self, metrics_registry: Optional['CollectorRegistry'] = None): """ Load the executor to this runtime, specified by ``uses`` CLI argument. :param metrics_registry: Optional prometheus metrics registry that will be passed to the executor so that it can expose metrics """ try: self._executor: BaseExecutor = BaseExecutor.load_config( self.args.uses, uses_with=self.args.uses_with, uses_metas=self.args.uses_metas, uses_requests=self.args.uses_requests, runtime_args={ # these are not parsed to the yaml config file but are pass directly during init 'workspace': self.args.workspace, 'shard_id': self.args.shard_id, 'shards': self.args.shards, 'replicas': self.args.replicas, 'name': self.args.name, 'metrics_registry': metrics_registry, }, py_modules=self.args.py_modules, extra_search_paths=self.args.extra_search_paths, ) self.logger.debug(f'{self._executor} is successfully loaded!') except BadConfigSource: self.logger.error( f'fail to load config from {self.args.uses}, if you are using docker image for --uses, ' f'please use `docker://YOUR_IMAGE_NAME`' ) raise except FileNotFoundError: self.logger.error(f'fail to load file dependency') raise except Exception: self.logger.critical(f'can not load the executor from {self.args.uses}') raise @staticmethod def _parse_params(parameters: Dict, executor_name: str): parsed_params = parameters specific_parameters = parameters.get(executor_name, None) if specific_parameters: parsed_params.update(**specific_parameters) return parsed_params async def handle(self, requests: List['DataRequest']) -> DataRequest: """Initialize private parameters and execute private loading functions. :param requests: The messages to handle containing a DataRequest :returns: the processed message """ # skip executor if endpoints mismatch if ( requests[0].header.exec_endpoint not in self._executor.requests and __default_endpoint__ not in self._executor.requests ): self.logger.debug( f'skip executor: mismatch request, exec_endpoint: {requests[0].header.exec_endpoint}, requests: {self._executor.requests}' ) return requests[0] if self._request_size_metrics: for req in requests: self._request_size_metrics.labels( requests[0].header.exec_endpoint, self._executor.__class__.__name__, self.args.name, ).observe(req.nbytes) params = self._parse_params(requests[0].parameters, self._executor.metas.name) docs = DataRequestHandler.get_docs_from_request( requests, field='docs', ) # executor logic return_data = await self._executor.__acall__( req_endpoint=requests[0].header.exec_endpoint, docs=docs, parameters=params, docs_matrix=DataRequestHandler.get_docs_matrix_from_request( requests, field='docs', ), ) # assigning result back to request if return_data is not None: if isinstance(return_data, DocumentArray): docs = return_data elif isinstance(return_data, dict): params = requests[0].parameters results_key = '__results__' if not results_key in params.keys(): params[results_key] = dict() params[results_key].update({self.args.name: return_data}) requests[0].parameters = params else: raise TypeError( f'The return type must be DocumentArray / Dict / `None`, ' f'but getting {return_data!r}' ) if self._counter: self._counter.labels( requests[0].header.exec_endpoint, self._executor.__class__.__name__, self.args.name, ).inc(len(docs)) DataRequestHandler.replace_docs(requests[0], docs, self.args.output_array_type) return requests[0] @staticmethod def replace_docs( request: List['DataRequest'], docs: 'DocumentArray', ndarrray_type: str = None ) -> None: """Replaces the docs in a message with new Documents. :param request: The request object :param docs: the new docs to be used :param ndarrray_type: type tensor and embedding will be converted to """ request.data.set_docs_convert_arrays(docs, ndarray_type=ndarrray_type) @staticmethod def replace_parameters(request: List['DataRequest'], parameters: Dict) -> None: """Replaces the parameters in a message with new Documents. :param request: The request object :param parameters: the new parameters to be used """ request.parameters = parameters @staticmethod def merge_routes(requests: List['DataRequest']) -> None: """Merges all routes found in requests into the first message :param requests: The messages containing the requests with the routes to merge """ if len(requests) <= 1: return existing_executor_routes = [r.executor for r in requests[0].routes] for request in requests[1:]: for route in request.routes: if route.executor not in existing_executor_routes: requests[0].routes.append(route) existing_executor_routes.append(route.executor) def close(self): """Close the data request handler, by closing the executor""" if not self._is_closed: self._executor.close() self._is_closed = True @staticmethod def get_docs_matrix_from_request( requests: List['DataRequest'], field: str, ) -> List['DocumentArray']: """ Returns a docs matrix from a list of DataRequest objects. :param requests: List of DataRequest objects :param field: field to be retrieved :return: docs matrix: list of DocumentArray objects """ if len(requests) > 1: result = [getattr(request, field) for request in requests] else: result = [getattr(requests[0], field)] # to unify all length=0 DocumentArray (or any other results) will simply considered as None # otherwise, the executor has to handle [None, None, None] or [DocArray(0), DocArray(0), DocArray(0)] len_r = sum(len(r) for r in result) if len_r: return result @staticmethod def get_parameters_dict_from_request( requests: List['DataRequest'], ) -> 'Dict': """ Returns a parameters dict from a list of DataRequest objects. :param requests: List of DataRequest objects :return: parameters matrix: list of parameters (Dict) objects """ key_result = '__results__' parameters = requests[0].parameters if key_result not in parameters.keys(): parameters[key_result] = dict() # we only merge the results and make the assumption that the others params does not change during execution for req in requests: parameters[key_result].update(req.parameters.get(key_result, dict())) return parameters @staticmethod def get_docs_from_request( requests: List['DataRequest'], field: str, ) -> 'DocumentArray': """ Gets a field from the message :param requests: requests to get the field from :param field: field name to access :returns: DocumentArray extraced from the field from all messages """ if len(requests) > 1: result = DocumentArray( [ d for r in reversed([request for request in requests]) for d in getattr(r, field) ] ) else: result = getattr(requests[0], field) return result @staticmethod def reduce(docs_matrix: List['DocumentArray']) -> Optional['DocumentArray']: """ Reduces a list of DocumentArrays into one DocumentArray. Changes are applied to the first DocumentArray in-place. Reduction consists in reducing every DocumentArray in `docs_matrix` sequentially using :class:`DocumentArray`.:method:`reduce`. The resulting DocumentArray contains Documents of all DocumentArrays. If a Document exists in many DocumentArrays, data properties are merged with priority to the left-most DocumentArrays (that is, if a data attribute is set in a Document belonging to many DocumentArrays, the attribute value of the left-most DocumentArray is kept). Matches and chunks of a Document belonging to many DocumentArrays are also reduced in the same way. Other non-data properties are ignored. .. note:: - Matches are not kept in a sorted order when they are reduced. You might want to re-sort them in a later step. - The final result depends on the order of DocumentArrays when applying reduction. :param docs_matrix: List of DocumentArrays to be reduced :return: the resulting DocumentArray """ if docs_matrix: da = docs_matrix[0] da.reduce_all(docs_matrix[1:]) return da @staticmethod def reduce_requests(requests: List['DataRequest']) -> 'DataRequest': """ Reduces a list of requests containing DocumentArrays inton one request object. Changes are applied to the first request object in-place. Reduction consists in reducing every DocumentArray in `requests` sequentially using :class:`DocumentArray`.:method:`reduce`. The resulting DataRequest object contains Documents of all DocumentArrays inside requests. :param requests: List of DataRequest objects :return: the resulting DataRequest """ docs_matrix = DataRequestHandler.get_docs_matrix_from_request( requests, field='docs' ) # Reduction is applied in-place to the first DocumentArray in the matrix da = DataRequestHandler.reduce(docs_matrix) DataRequestHandler.replace_docs(requests[0], da) params = DataRequestHandler.get_parameters_dict_from_request(requests) DataRequestHandler.replace_parameters(requests[0], params) return requests[0]
from prometheus_client import Summary, Gauge from prometheus_client.metrics import MetricWrapperBase from grocy_telegram_bot.const import * COMMAND_TIME = Summary('command_processing_seconds', 'Time spent in a command handler', ['command']) COMMAND_TIME_START = COMMAND_TIME.labels(command=COMMAND_START) COMMAND_TIME_CHORES = COMMAND_TIME.labels(command=COMMAND_CHORES) COMMAND_TIME_INVENTORY = COMMAND_TIME.labels(command=COMMAND_INVENTORY) COMMAND_TIME_SHOPPING = COMMAND_TIME.labels(command=COMMAND_SHOPPING) COMMAND_TIME_SHOPPING_LIST = COMMAND_TIME.labels(command=COMMAND_SHOPPING_LIST) COMMAND_TIME_SHOPPING_LIST_ADD = COMMAND_TIME.labels(command=COMMAND_SHOPPING_LIST_ADD) PRODUCT_INVENTORY_COUNT = Gauge( 'product_inventory_count', 'Number of inventory items per product name', ['product_name'] ) EXPIRED_PRODUCTS_COUNT = Gauge( 'expired_products_count', 'Number of expired products in inventory' ) PRODUCTS_BELOW_MINIMUM_STOCK_COUNT = Gauge( 'products_below_minimum_stock_count', 'Number of items a product is below its minimum stock', ['product_name'] ) CHORES_COUNT = Gauge(
class PrometheusMonitor(object): def __init__(self, app, broker, interval=1): # self.interval = interval start_http_server(8000) self.app = app self.state = app.events.State() self.broker_conn = BrokerConnection(broker) self.gateway = 'localhost:9091' self.create_metric() def create_metric(self): # record app conf self.conf_info = Info('celery_conf_info', 'APP_CONF') self.conf_info_c = CollectorRegistry() # monitor worker info self.workers_info = Info('celery_workers_info', 'WORKER_INFO') self.workers_info_c = CollectorRegistry() # monitor worker info real-time self.workers_state = Gauge('celery_workers_state', 'WORKER_STATE', ['worker']) self.workers_state_c = CollectorRegistry() self.workers_processed = Gauge('celery_processed_tasks_total', 'WORKER_TASKS_PROCESSED', ['worker']) self.workers_processed_c = CollectorRegistry() self.workers_active = Gauge('celery_active_tasks_total', 'WORKER_TASKS_ACTIVE', ['worker']) self.workers_active_c = CollectorRegistry() # monitor tasks info self.tasks_counter = Counter('celery_tasks_total', 'TASK_COUNT_INFO', ['worker', 'task', 'result']) self.tasks_counter_c = CollectorRegistry() self.tasks_runtime = Summary('celery_tasks_seconds', 'TASK_RUNTIME', ['worker', 'task']) self.tasks_runtime_c = CollectorRegistry() self.tasks_info = Info('celery_tasks_info', 'TASK_INFO') self.tasks_info_c = CollectorRegistry() @staticmethod def auth_handler(url, method, timeout, headers, data): username = get_config_values('pushgateway', 'username') password = get_config_values('pushgateway', 'password') return basic_auth_handler(url, method, timeout, headers, data, username, password) # monitor the task and status of worker with functions def run_loop(self): # self.on_application_conf() while True: try: with self.broker_conn as conn: recv = EventReceiver(conn, handlers={ 'task-sent': self.on_task_sent, 'task-failed': self.on_task_failed, 'task-retried': self.on_task_retried, 'task-started': self.on_task_started, 'task-succeeded': self.on_task_succeeded, 'task-received': self.on_task_received, 'task-rejected': self.on_task_rejected, 'task-revoked': self.on_task_revoked, 'worker-online': self.on_worker_online, 'worker-heartbeat': self.on_worker_heartbeat, 'worker-offline': self.on_worker_offline, }) recv.capture(limit=None, timeout=None, wakeup=True) except (KeyboardInterrupt, SystemExit): raise except Exception: raise # time.sleep(self.interval) # all about configuration def on_application_conf(self): # TODO conf = {} # get the password shielded for key in self.app.conf.keys(): if key.lower() in ['broker_url', 'celery_result_backend']: if isinstance(self.app.conf[key], str): uri = re.sub(r':.*?@', ':********@', self.app.conf[key]) conf[key] = re.sub(r'@.*?:', '@hostname:', uri) else: conf[key] = 'unknown' elif bool(re.search(r'password', key.lower())): conf[key] = '********' if self.app.conf[ key] is not None else None else: conf[key] = str( self.app.conf[key] ) if self.app.conf[key] is not None else 'unknown' self.conf_info.info(conf) # all about the tasks def on_task_sent(self, event): # TODO self.state.event(event) task = self.state.tasks.get(event['uuid']) def on_task_received(self, event): # TODO self.state.event(event) task = self.state.tasks.get(event['uuid']) def on_task_started(self, event): self.state.event(event) task = self.state.tasks.get(event['uuid']) logger.info('Task {}[{}] started'.format(task.name, task.uuid)) def on_task_succeeded(self, event): self.state.event(event) task = self.state.tasks.get(event['uuid']) logger.info('Task {}[{}] succeeded'.format(task.name, task.uuid)) self.tasks_counter.labels(worker=task.hostname, task=task.name, result='succeeded').inc() self.tasks_runtime.labels(worker=task.hostname, task=task.name).observe(task.runtime) self.tasks_info.info({ 'name': task.name, 'uuid': task.uuid, 'result': 'succeeded', 'runtime': str(task.runtime), 'hostname': task.hostname, 'timestamp': str(task.timestamp) }) push_to_gateway(self.gateway, job='pushgateway', registry=self.tasks_counter_c, handler=self.auth_handler) push_to_gateway(self.gateway, job='pushgateway', registry=self.tasks_runtime_c, handler=self.auth_handler) push_to_gateway(self.gateway, job='pushgateway', registry=self.tasks_info_c, handler=self.auth_handler) def on_task_failed(self, event): # TODO self.state.event(event) task = self.state.tasks.get(event['uuid']) logger.warning('Task {}[{}] failed'.format(task.name, task.uuid)) self.tasks_counter.labels(worker=task.hostname, task=task.name, result='failed').inc() self.tasks_info.info({ 'name': task.name, 'uuid': task.uuid, 'result': 'failed', 'exception': task.exception, 'traceback': task.traceback, 'hostname': task.hostname, 'timestamp': str(task.timestamp) }) push_to_gateway(self.gateway, job='pushgateway', registry=self.tasks_counter_c, handler=self.auth_handler) push_to_gateway(self.gateway, job='pushgateway', registry=self.tasks_info_c, handler=self.auth_handler) def on_task_retried(self, event): # TODO self.state.event(event) task = self.state.tasks.get(event['uuid']) logger.warning('Task {}[{}] retried'.format(task.name, task.uuid)) self.tasks_counter.labels(worker=task.hostname, task=task.name, result='retried').inc() self.tasks_info.info({ 'name': task.name, 'uuid': task.uuid, 'result': 'retried', 'exception': task.exception, 'traceback': task.traceback, 'hostname': task.hostname, 'timestamp': str(task.timestamp) }) push_to_gateway(self.gateway, job='pushgateway', registry=self.tasks_counter_c, handler=self.auth_handler) push_to_gateway(self.gateway, job='pushgateway', registry=self.tasks_info_c, handler=self.auth_handler) def on_task_rejected(self, event): # TODO self.state.event(event) task = self.state.tasks.get(event['uuid']) def on_task_revoked(self, event): # TODO self.state.event(event) task = self.state.tasks.get(event['uuid']) # all about the status of the workers def on_worker_online(self, event): # TODO self.state.event(event) worker = self.state.workers.get(event['hostname']) self.workers_state.labels(worker=worker.hostname).set(1) self.workers_info.info({ 'hostname': worker.hostname, 'sw_ident': worker.sw_ident, 'sw_ver': worker.sw_ver, 'sw_sys': worker.sw_sys }) push_to_gateway(self.gateway, job='pushgateway', registry=self.workers_state_c, handler=self.auth_handler) push_to_gateway(self.gateway, job='pushgateway', registry=self.workers_info_c, handler=self.auth_handler) def on_worker_heartbeat(self, event): self.state.event(event) worker = self.state.workers.get(event['hostname']) if worker.processed is None: worker.processed = 0 if worker.active is None: worker.active = 0 self.workers_processed.labels(worker=worker.hostname).set( worker.processed) self.workers_active.labels(worker=worker.hostname).set(worker.active) push_to_gateway(self.gateway, job='pushgateway', registry=self.workers_processed_c, handler=self.auth_handler) push_to_gateway(self.gateway, job='pushgateway', registry=self.workers_active_c, handler=self.auth_handler) self.workers_state.labels(worker=worker.hostname).set(1) push_to_gateway(self.gateway, job='pushgateway', registry=self.workers_state_c, handler=self.auth_handler) def on_worker_offline(self, event): # TODO self.state.event(event) worker = self.state.workers.get(event['hostname']) self.workers_state.labels(worker=worker.hostname).set(0) push_to_gateway(self.gateway, job='pushgateway', registry=self.workers_state_c, handler=self.auth_handler)
from prometheus_client import start_http_server, Summary, Gauge import random import time # Create a metric to track time spent for a function and count of requests made. REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request', ['method']) request1 = REQUEST_TIME.labels(method='request1') # Decorate function with metric. @request1.time() def process_request(t): """A dummy function that takes some time.""" # we could also execute an external task liek fetching a url time.sleep(t) # Additional metrics CPU_TEMP = Gauge('cpu_temperature', 'Delivers the current temperature of the cpu', ['cpu', 'core']) CPU_FANSPEED = Gauge('cpu_fanspeed', 'Delivers the rotation per minute of the cpu fan', ['cpu', 'core']) if __name__ == '__main__': # Start up the server to expose the metrics. start_http_server(9100) # Generate some requests.
from flask import Flask, flash, render_template, redirect, url_for, request, session from module.database import Database from prometheus_client import start_http_server, Counter, Summary call_metric = Counter('opsschool_monitor_flask_main_count', 'Number of visits to main', ["service", "endpoint"]) time_metric = Summary('opsschool_monitor_flask_request_processing_seconds', 'Time spent processing request', ["method"]) app = Flask(__name__) app.secret_key = "mys3cr3tk3y" db = Database() hello_world_timer = time_metric.labels(method="hello_world") @hello_world_timer.time() def hello_world(): call_metric.labels(service='opsschool_flask', endpoint='main').inc(1) return 'Hey, we have a hello world!' @app.route('/') def index(): data = db.read(None) return render_template('index.html', data=data) @app.route('/add/') def add():
def test_summary(self): s = Summary('ss', 'A summary', ['a', 'b'], registry=self.registry) s.labels('c', 'd').observe(17) self.assertEqual(b'# HELP ss A summary\n# TYPE ss summary\nss_count{a="c",b="d"} 1.0\nss_sum{a="c",b="d"} 17.0\n', generate_latest(self.registry))
# time.sleep(t) if __name__ == '__main__': i = 0 # Start up the server to expose the metrics. # c = Counter('cc', 'Product X counter') # c.inc() # g = Gauge('gg', 'A gauge') # g.set(17) ## GOOD ONE plevel = ['A', 'B', 'C', 'D'] s = Summary('reservation', 'A summary', ['instance', 'run_id' ,'loop', 'packaging_level','quatity', 'size_epcs']) s.labels('VE001', 'Run001', str(i), plevel[i % 4], 10000, '0' ).observe(17) # h = Histogram('hh', 'A histogram') # h.observe(.25) start_http_server(8000) # Generate some requests. time.sleep(5) while True: # h.observe(random.randint(1, 10)) # process_request(random.random()) i += 1 s.labels('VE001', 'Run001', str(i/4), plevel[i % 4], 10000, '0').observe(18) # g.set(55)
class Prometheus(commands.Cog): """Collects prometheus metrics""" def __init__(self, bot): self.bot = bot self.ram_gauge = Gauge( "miso_memory_usage_bytes", "Memory usage of the bot process in bytes.", ) self.cpu_gauge = Gauge( "system_cpu_usage_percent", "CPU usage of the system in percent.", ["core"], ) self.event_counter = Counter( "miso_gateway_events_total", "Total number of gateway events.", ["event_type"], ) self.command_histogram = Histogram( "miso_command_response_time_seconds", "Command end-to-end response time in seconds.", ["command"], buckets=(0.1, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 5.0), ) self.shard_latency_summary = Summary( "miso_shard_latency_seconds", "Latency of a shard in seconds.", ["shard"], ) self.guild_count = Gauge( "miso_cached_guild_count", "Total amount of guilds cached.", ) self.member_count = Gauge( "miso_cached_member_count", "Total amount of members cached.", ) async def cog_load(self): self.log_system_metrics.start() self.log_shard_latencies.start() self.log_cache_contents.start() def cog_unload(self): self.log_system_metrics.cancel() self.log_shard_latencies.cancel() self.log_cache_contents.cancel() @commands.Cog.listener() async def on_socket_event_type(self, event_type): self.event_counter.labels(event_type).inc() @tasks.loop(seconds=10) async def log_shard_latencies(self): for shard in self.bot.shards.values(): self.shard_latency_summary.labels(shard.id).observe(shard.latency) @tasks.loop(minutes=1) async def log_cache_contents(self): guild_count = len(self.bot.guilds) member_count = len(self.bot.users) self.guild_count.set(guild_count) self.member_count.set(member_count) @tasks.loop(seconds=10) async def log_system_metrics(self): ram = psutil.Process().memory_info().rss self.ram_gauge.set(ram) for core, usage in enumerate( psutil.cpu_percent(interval=None, percpu=True)): self.cpu_gauge.labels(core).set(usage) @log_shard_latencies.before_loop @log_cache_contents.before_loop async def task_waiter(self): await self.bot.wait_until_ready() @commands.Cog.listener() async def on_command_completion(self, ctx: commands.Context): if ctx.invoked_subcommand is None: took = time() - ctx.timer command = str(ctx.command) self.command_histogram.labels(command).observe(took)
import time import random from prometheus_client import start_http_server from prometheus_client import Counter, Gauge, Histogram, Summary from prometheus_client import Info, Enum cc = Counter('cc', 'A counter') gg = Gauge('gg', 'A gauge') hh = Histogram('hh', 'A histogram', buckets=(-5, 0, 5), labelnames=['a', 'b']) ss = Summary('ss', 'A summary', labelnames=['a', 'b']) i = Info('my_build_version', 'Description of info') e = Enum('my_task_state', 'Description of enum', states=['starting', 'running', 'stopped']) i.info({'version': '1.2.3', 'buildhost': 'foo@bar'}) if __name__ == '__main__': start_http_server(8000) while True: cc.inc() gg.set(random.random()) hh.labels('c', 'd').observe(random.randint(-10, 10)) ss.labels(a='c', b='d').observe(17) e.state('running') time.sleep(2)
from flask import Flask, render_template from prometheus_client import start_http_server, Counter, Summary app = Flask(__name__) call_metric = Counter('opsschool_monitor_hello_whale_main_count', 'Number of visits to main', ["service", "endpoint"]) time_metric = Summary( 'opsschool_monitor_hello_whale_request_processing_seconds', 'Time spent processing request', ["method"]) hello_whale_timer = time_metric.labels(method="hello_whale") @app.route('/') @hello_whale_timer.time() def hello_whale(): call_metric.labels(service='opsschool_hello_whale', endpoint='main').inc(1) return render_template("whale_hello.html") if __name__ == '__main__': start_http_server(5001) app.run(debug=False, host='0.0.0.0')
METRICS_LABEL_NAMES = ['instance', 'badgeReader', 'ftp_server'] _CLIENT_INFO = Info('epas_client', 'Tipologia di client e protocollo utilizzato', METRICS_LABEL_NAMES, registry=CLIENT_REGISTRY) CLIENT_INFO = _CLIENT_INFO.labels(EPAS_REST_USERNAME, BADGE_READER_IP, FTP_SERVER_NAME) # Create a metric to track time spent to execute job. _JOB_TIME = Summary('epas_client_job_processing_seconds', 'Time spent executing job', METRICS_LABEL_NAMES, registry=CLIENT_REGISTRY) JOB_TIME = _JOB_TIME.labels(EPAS_REST_USERNAME, BADGE_READER_IP, FTP_SERVER_NAME) _SEND_TIME = Histogram('epas_client_send_stamping_seconds', 'Tempi di invio delle timbrature', METRICS_LABEL_NAMES, registry=CLIENT_REGISTRY) SEND_TIME = _SEND_TIME.labels(EPAS_REST_USERNAME, BADGE_READER_IP, FTP_SERVER_NAME) _STAMPINGS_SENT = Gauge('epas_client_stampings_sent_total', 'Timbrature inviate', METRICS_LABEL_NAMES, registry=CLIENT_REGISTRY) STAMPINGS_SENT = _STAMPINGS_SENT.labels(EPAS_REST_USERNAME, BADGE_READER_IP, FTP_SERVER_NAME)
# flask_web/app.py from flask import Flask from prometheus_client import start_http_server, Counter, Summary app = Flask(__name__) call_metric = Counter('opsschool_monitor_flask_main_count', 'Number of visits to main', ["service", "endpoint"]) time_metric = Summary('opsschool_monitor_flask_request_processing_seconds', 'Time spent processing request', ["method"]) hello_world_timer = time_metric.labels(method="hello_world") @app.route('/') @hello_world_timer.time() def hello_world(): call_metric.labels(service='opsschool_flask', endpoint='main').inc(1) return 'Hey, we have Flask in a Docker container!' goaway_timer = time_metric.labels(method="goaway") @app.route('/goaway') @goaway_timer.time() def goaway(): call_metric.labels(service='opsschool_flask', endpoint='goaway').inc(1) return 'GO AWAY!'
class ContextImpl(pulsar.Context): # add label to indicate user metric user_metrics_label_names = Stats.metrics_label_names + ["metric"] def __init__(self, instance_config, logger, pulsar_client, user_code, consumers, secrets_provider, metrics_labels): self.instance_config = instance_config self.log = logger self.pulsar_client = pulsar_client self.user_code_dir = os.path.dirname(user_code) self.consumers = consumers self.secrets_provider = secrets_provider self.accumulated_metrics = {} self.publish_producers = {} self.publish_serializers = {} self.current_message_id = None self.current_input_topic_name = None self.current_start_time = None self.user_config = json.loads(instance_config.function_details.userConfig) \ if instance_config.function_details.userConfig \ else [] self.secrets_map = json.loads(instance_config.function_details.secretsMap) \ if instance_config.function_details.secretsMap \ else {} self.metrics_labels = metrics_labels self.user_metrics_labels = dict() self.user_metrics_summary = Summary("pulsar_function_user_metric", 'Pulsar Function user defined metric', ContextImpl.user_metrics_label_names) # Called on a per message basis to set the context for the current message def set_current_message_context(self, msgid, topic): self.current_message_id = msgid self.current_input_topic_name = topic self.current_start_time = time.time() def get_message_id(self): return self.current_message_id def get_current_message_topic_name(self): return self.current_input_topic_name def get_function_name(self): return self.instance_config.function_details.name def get_function_tenant(self): return self.instance_config.function_details.tenant def get_function_namespace(self): return self.instance_config.function_details.namespace def get_function_id(self): return self.instance_config.function_id def get_instance_id(self): return self.instance_config.instance_id def get_function_version(self): return self.instance_config.function_version def get_logger(self): return self.log def get_user_config_value(self, key): if key in self.user_config: return self.user_config[key] else: return None def get_user_config_map(self): return self.user_config def get_secret(self, secret_key): if not secret_key in self.secrets_map: return None return self.secrets_provider.provide_secret(secret_key, self.secrets_map[secret_key]) def record_metric(self, metric_name, metric_value): if metric_name not in self.user_metrics_labels: self.user_metrics_labels[metric_name] = self.metrics_labels + [metric_name] self.user_metrics_summary.labels(*self.user_metrics_labels[metric_name]).observe(metric_value) if not metric_name in self.accumulated_metrics: self.accumulated_metrics[metric_name] = AccumulatedMetricDatum() self.accumulated_metrics[metric_name].update(metric_value) def get_output_topic(self): return self.instance_config.function_details.output def get_output_serde_class_name(self): return self.instance_config.function_details.outputSerdeClassName def publish(self, topic_name, message, serde_class_name="serde.IdentitySerDe", properties=None, compression_type=None): # Just make sure that user supplied values are properly typed topic_name = str(topic_name) serde_class_name = str(serde_class_name) pulsar_compression_type = pulsar._pulsar.CompressionType.NONE if compression_type is not None: pulsar_compression_type = compression_type if topic_name not in self.publish_producers: self.publish_producers[topic_name] = self.pulsar_client.create_producer( topic_name, block_if_queue_full=True, batching_enabled=True, batching_max_publish_delay_ms=1, max_pending_messages=100000, compression_type=pulsar_compression_type ) if serde_class_name not in self.publish_serializers: serde_klass = util.import_class(self.user_code_dir, serde_class_name) self.publish_serializers[serde_class_name] = serde_klass() output_bytes = bytes(self.publish_serializers[serde_class_name].serialize(message)) self.publish_producers[topic_name].send_async(output_bytes, None, properties=properties) def ack(self, msgid, topic): if topic not in self.consumers: raise ValueError('Invalid topicname %s' % topic) self.consumers[topic].acknowledge(msgid) def get_and_reset_metrics(self): metrics = self.get_metrics() # TODO(sanjeev):- Make this thread safe self.reset_metrics() return metrics def reset_metrics(self): # TODO: Make it thread safe for labels in self.user_metrics_labels.values(): self.user_metrics_summary.labels(*labels)._sum.set(0.0) self.user_metrics_summary.labels(*labels)._count.set(0.0) self.accumulated_metrics.clear() def get_metrics(self): metrics = InstanceCommunication_pb2.MetricsData() for metric_name, accumulated_metric in self.accumulated_metrics.items(): metrics.metrics[metric_name].count = accumulated_metric.count metrics.metrics[metric_name].sum = accumulated_metric.sum metrics.metrics[metric_name].max = accumulated_metric.max metrics.metrics[metric_name].min = accumulated_metric.min return metrics
from prometheus_client import start_http_server, Gauge, Summary, Counter import time import argparse from pyawair.auth import * from pyawair.conn import * from pyawair.data import * import pyawair import traceback REQUEST_TIME = Summary('awair_equest_processing_seconds', 'Time spent processing request', ['method']) awair_device_api_usage_time = REQUEST_TIME.labels(method="retrieve_api_usage") awair_device_data_time = REQUEST_TIME.labels(method="retrieve_data_usage") RESPONSE_CODE = Counter('awair_reponse_code', 'HTTP Response Codes', ['http_code']) FAILURE_COUNT = Counter('awair_failure_count', 'AWAIR API FAILURES', ['method']) AWAIR_SCORE = Gauge("awair_device_score", "Awair score of device", ['device']) AWAIR_TEMP = Gauge("awair_device_temp", "Awair temp of device", ['device']) AWAIR_HUMID = Gauge("awair_device_humid", "Awair humidity of device", ['device']) AWAIR_CO2 = Gauge("awair_device_co2", "Awair co2 level of device", ['device']) AWAIR_VOC = Gauge("awair_device_voc", "Awair voc of device", ['device']) AWAIR_PM25 = Gauge("awair_device_pm25", "Awair pm25 of device", ['device']) AWAIR_USAGE = Gauge("awair_device_api_usage", "Api usage of device", ['device', 'scope']) def get_data_usage(auth, id, type, base_url, data_url, args=''): """
class BaseExecutor(JAMLCompatible, metaclass=ExecutorType): """ The base class of all Executors, can be used to build encoder, indexer, etc. :class:`jina.Executor` as an alias for this class. EXAMPLE USAGE .. code-block:: python from jina import Executor, requests, Flow class MyExecutor(Executor): @requests def foo(self, docs, **kwargs): print(docs) # process docs here f = Flow().add(uses=Executor) # you can add your Executor to a Flow Any executor inherited from :class:`BaseExecutor` always has the **meta** defined in :mod:`jina.executors.metas.defaults`. All arguments in the :func:`__init__` can be specified with a ``with`` map in the YAML config. Example: .. highlight:: python .. code-block:: python class MyAwesomeExecutor(Executor): def __init__(awesomeness=5): pass is equal to .. highlight:: yaml .. code-block:: yaml jtype: MyAwesomeExecutor with: awesomeness: 5 """ def __init__( self, metas: Optional[Dict] = None, requests: Optional[Dict] = None, runtime_args: Optional[Dict] = None, **kwargs, ): """`metas` and `requests` are always auto-filled with values from YAML config. :param metas: a dict of metas fields :param requests: a dict of endpoint-function mapping :param runtime_args: a dict of arguments injected from :class:`Runtime` during runtime :param kwargs: additional extra keyword arguments to avoid failing when extra params ara passed that are not expected """ self._add_metas(metas) self._add_requests(requests) self._add_runtime_args(runtime_args) self._init_monitoring() self.logger = JinaLogger(self.__class__.__name__) if __dry_run_endpoint__ not in self.requests: self.requests[__dry_run_endpoint__] = self._dry_run_func else: self.logger.warning( f' Endpoint {__dry_run_endpoint__} is defined by the Executor. Be aware that this endpoint is usually reserved to enable health checks from the Client through the gateway.' f' So it is recommended not to expose this endpoint. ') def _dry_run_func(self, *args, **kwargs): pass def _add_runtime_args(self, _runtime_args: Optional[Dict]): if _runtime_args: self.runtime_args = SimpleNamespace(**_runtime_args) else: self.runtime_args = SimpleNamespace() def _init_monitoring(self): if (hasattr(self.runtime_args, 'metrics_registry') and self.runtime_args.metrics_registry): with ImportExtensions( required=True, help_text= 'You need to install the `prometheus_client` to use the montitoring functionality of jina', ): from prometheus_client import Summary self._summary_method = Summary( 'process_request_seconds', 'Time spent when calling the executor request method', registry=self.runtime_args.metrics_registry, namespace='jina', labelnames=('executor', 'executor_endpoint', 'runtime_name'), ) self._metrics_buffer = { 'process_request_seconds': self._summary_method } else: self._summary_method = None self._metrics_buffer = None def _add_requests(self, _requests: Optional[Dict]): if not hasattr(self, 'requests'): self.requests = {} if _requests: func_names = {f.__name__: e for e, f in self.requests.items()} for endpoint, func in _requests.items(): # the following line must be `getattr(self.__class__, func)` NOT `getattr(self, func)` # this to ensure we always have `_func` as unbound method if func in func_names: del self.requests[func_names[func]] _func = getattr(self.__class__, func) if callable(_func): # the target function is not decorated with `@requests` yet self.requests[endpoint] = _func elif typename( _func) == 'jina.executors.decorators.FunctionMapper': # the target function is already decorated with `@requests`, need unwrap with `.fn` self.requests[endpoint] = _func.fn else: raise TypeError( f'expect {typename(self)}.{func} to be a function, but receiving {typename(_func)}' ) def _add_metas(self, _metas: Optional[Dict]): from jina.serve.executors.metas import get_default_metas tmp = get_default_metas() if _metas: tmp.update(_metas) unresolved_attr = False target = SimpleNamespace() # set self values filtered by those non-exist, and non-expandable for k, v in tmp.items(): if k == 'workspace' and not (v is None or v == ''): warnings.warn( 'Setting `workspace` via `metas.workspace` is deprecated. ' 'Instead, use `f.add(..., workspace=...)` when defining a a Flow in Python; ' 'the `workspace` parameter when defining a Flow using YAML; ' 'or `--workspace` when starting an Executor using the CLI.', category=DeprecationWarning, ) if not hasattr(target, k): if isinstance(v, str): if not env_var_regex.findall(v): setattr(target, k, v) else: unresolved_attr = True else: setattr(target, k, v) elif type(getattr(target, k)) == type(v): setattr(target, k, v) if unresolved_attr: _tmp = vars(self) _tmp['metas'] = tmp new_metas = JAML.expand_dict(_tmp)['metas'] for k, v in new_metas.items(): if not hasattr(target, k): if isinstance(v, str): if not (env_var_regex.findall(v) or internal_var_regex.findall(v)): setattr(target, k, v) else: raise ValueError( f'{k}={v} is not substitutable or badly referred' ) else: setattr(target, k, v) # `name` is important as it serves as an identifier of the executor # if not given, then set a name by the rule if not getattr(target, 'name', None): setattr(target, 'name', self.__class__.__name__) self.metas = target def close(self) -> None: """ Always invoked as executor is destroyed. You can write destructor & saving logic here. """ pass def __call__(self, req_endpoint: str, **kwargs): """ # noqa: DAR101 # noqa: DAR102 # noqa: DAR201 """ if req_endpoint in self.requests: return self.requests[req_endpoint]( self, **kwargs) # unbound method, self is required elif __default_endpoint__ in self.requests: return self.requests[__default_endpoint__]( self, **kwargs) # unbound method, self is required async def __acall__(self, req_endpoint: str, **kwargs): """ # noqa: DAR101 # noqa: DAR102 # noqa: DAR201 """ if req_endpoint in self.requests: return await self.__acall_endpoint__(req_endpoint, **kwargs) elif __default_endpoint__ in self.requests: return await self.__acall_endpoint__(__default_endpoint__, **kwargs) async def __acall_endpoint__(self, req_endpoint, **kwargs): func = self.requests[req_endpoint] runtime_name = (self.runtime_args.name if hasattr( self.runtime_args, 'name') else None) _summary = (self._summary_method.labels( self.__class__.__name__, req_endpoint, runtime_name).time() if self._summary_method else contextlib.nullcontext()) with _summary: if iscoroutinefunction(func): return await func(self, **kwargs) else: return func(self, **kwargs) @property def workspace(self) -> Optional[str]: """ Get the workspace directory of the Executor. :return: returns the workspace of the current shard of this Executor. """ workspace = (getattr(self.runtime_args, 'workspace', None) or getattr(self.metas, 'workspace') or os.environ.get('JINA_DEFAULT_WORKSPACE_BASE')) if workspace: complete_workspace = os.path.join(workspace, self.metas.name) shard_id = getattr( self.runtime_args, 'shard_id', None, ) if shard_id is not None and shard_id != -1: complete_workspace = os.path.join(complete_workspace, str(shard_id)) if not os.path.exists(complete_workspace): os.makedirs(complete_workspace) return os.path.abspath(complete_workspace) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() @classmethod def from_hub( cls: Type[T], uri: str, context: Optional[Dict[str, Any]] = None, uses_with: Optional[Dict] = None, uses_metas: Optional[Dict] = None, uses_requests: Optional[Dict] = None, **kwargs, ) -> T: """Construct an Executor from Hub. :param uri: a hub Executor scheme starts with `jinahub://` :param context: context replacement variables in a dict, the value of the dict is the replacement. :param uses_with: dictionary of parameters to overwrite from the default config's with field :param uses_metas: dictionary of parameters to overwrite from the default config's metas field :param uses_requests: dictionary of parameters to overwrite from the default config's requests field :param kwargs: other kwargs accepted by the CLI ``jina hub pull`` :return: the Hub Executor object. .. highlight:: python .. code-block:: python from jina import Executor from docarray import Document, DocumentArray executor = Executor.from_hub( uri='jinahub://CLIPImageEncoder', install_requirements=True ) """ from jina.hubble.helper import is_valid_huburi _source = None if is_valid_huburi(uri): from jina.hubble.hubio import HubIO from jina.parsers.hubble import set_hub_pull_parser _args = ArgNamespace.kwargs2namespace( { 'no_usage': True, **kwargs }, set_hub_pull_parser(), positional_args=(uri, ), ) _source = HubIO(args=_args).pull() if not _source or _source.startswith('docker://'): raise ValueError( f'Can not construct a native Executor from {uri}. Looks like you want to use it as a ' f'Docker container, you may want to use it in the Flow via `.add(uses={uri})` instead.' ) return cls.load_config( _source, context=context, uses_with=uses_with, uses_metas=uses_metas, uses_requests=uses_requests, ) @classmethod def serve( cls, uses_with: Optional[Dict] = None, uses_metas: Optional[Dict] = None, uses_requests: Optional[Dict] = None, stop_event: Optional[Union[threading.Event, multiprocessing.Event]] = None, **kwargs, ): """Serve this Executor in a temporary Flow. Useful in testing an Executor in remote settings. :param uses_with: dictionary of parameters to overwrite from the default config's with field :param uses_metas: dictionary of parameters to overwrite from the default config's metas field :param uses_requests: dictionary of parameters to overwrite from the default config's requests field :param stop_event: a threading event or a multiprocessing event that once set will resume the control Flow to main thread. :param kwargs: other kwargs accepted by the Flow, full list can be found `here <https://docs.jina.ai/api/jina.orchestrate.flow.base/>` """ from jina import Flow f = Flow(**kwargs).add( uses=cls, uses_with=uses_with, uses_metas=uses_metas, uses_requests=uses_requests, ) with f: f.block(stop_event) class StandaloneExecutorType(BetterEnum): """ Type of standalone Executors """ EXTERNAL = 0 # served by a gateway SHARED = 1 # not served by a gateway, served by head/worker @staticmethod def to_kubernetes_yaml( uses: str, output_base_path: str, k8s_namespace: Optional[str] = None, executor_type: Optional[ StandaloneExecutorType] = StandaloneExecutorType.EXTERNAL, uses_with: Optional[Dict] = None, uses_metas: Optional[Dict] = None, uses_requests: Optional[Dict] = None, **kwargs, ): """ Converts the Executor into a set of yaml deployments to deploy in Kubernetes. If you don't want to rebuild image on Jina Hub, you can set `JINA_HUB_NO_IMAGE_REBUILD` environment variable. :param uses: the Executor to use. Has to be containerized and accessible from K8s :param output_base_path: The base path where to dump all the yaml files :param k8s_namespace: The name of the k8s namespace to set for the configurations. If None, the name of the Flow will be used. :param executor_type: The type of Executor. Can be external or shared. External Executors include the Gateway. Shared Executors don't. Defaults to External :param uses_with: dictionary of parameters to overwrite from the default config's with field :param uses_metas: dictionary of parameters to overwrite from the default config's metas field :param uses_requests: dictionary of parameters to overwrite from the default config's requests field :param kwargs: other kwargs accepted by the Flow, full list can be found `here <https://docs.jina.ai/api/jina.orchestrate.flow.base/>` """ from jina import Flow Flow(**kwargs).add( uses=uses, uses_with=uses_with, uses_metas=uses_metas, uses_requests=uses_requests, ).to_kubernetes_yaml( output_base_path=output_base_path, k8s_namespace=k8s_namespace, include_gateway=executor_type == BaseExecutor.StandaloneExecutorType.EXTERNAL, ) to_k8s_yaml = to_kubernetes_yaml @staticmethod def to_docker_compose_yaml( uses: str, output_path: Optional[str] = None, network_name: Optional[str] = None, executor_type: Optional[ StandaloneExecutorType] = StandaloneExecutorType.EXTERNAL, uses_with: Optional[Dict] = None, uses_metas: Optional[Dict] = None, uses_requests: Optional[Dict] = None, **kwargs, ): """ Converts the Executor into a yaml file to run with `docker-compose up` :param uses: the Executor to use. Has to be containerized :param output_path: The output path for the yaml file :param network_name: The name of the network that will be used by the deployment name :param executor_type: The type of Executor. Can be external or shared. External Executors include the Gateway. Shared Executors don't. Defaults to External :param uses_with: dictionary of parameters to overwrite from the default config's with field :param uses_metas: dictionary of parameters to overwrite from the default config's metas field :param uses_requests: dictionary of parameters to overwrite from the default config's requests field :param kwargs: other kwargs accepted by the Flow, full list can be found `here <https://docs.jina.ai/api/jina.orchestrate.flow.base/>` """ from jina import Flow f = Flow(**kwargs).add( uses=uses, uses_with=uses_with, uses_metas=uses_metas, uses_requests=uses_requests, ) f.to_docker_compose_yaml( output_path=output_path, network_name=network_name, include_gateway=executor_type == BaseExecutor.StandaloneExecutorType.EXTERNAL, ) def monitor(self, name: Optional[str] = None, documentation: Optional[str] = None) -> Optional['Summary']: """ Get a given prometheus metric, if it does not exist yet, it will create it and store it in a buffer. :param name: the name of the metrics :param documentation: the description of the metrics :return: the given prometheus metrics or None if monitoring is not enable. """ if self._metrics_buffer: if name not in self._metrics_buffer: from prometheus_client import Summary self._metrics_buffer[name] = Summary( name, documentation, registry=self.runtime_args.metrics_registry, namespace='jina', labelnames=('runtime_name', ), ).labels(self.runtime_args.name) return self._metrics_buffer[name].time() else: return contextlib.nullcontext()
from prometheus_client import start_http_server, Summary import random import time # Create a metric to track time spent and requests made. REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request',['method']) # Decorate function with metric. @REQUEST_TIME.time() def process_request(t): """A dummy function that takes some time.""" time.sleep(t) if __name__ == '__main__': # Start up the server to expose the metrics. start_http_server(8000) # Generate some requests. while True: REQUEST_TIME.labels("yz").observe(17) process_request(random.random())
YES = '1' KIOSK_WALK_IN = '4' COMPLETE = '2' METRIC_REGISTRY = CollectorRegistry() METRIC_REDCAP_REQUEST_SECONDS = Summary( "redcap_request_seconds", "Time spent making requests to REDCap", labelnames = ["function"], registry = METRIC_REGISTRY, ) # Declare this before using it so that it's always an exported metric, even if # never called due to perfect caching. METRIC_FETCH_PARTICIPANT = METRIC_REDCAP_REQUEST_SECONDS.labels("fetch_participant") def metric_redcap_request_seconds(function_name = None): def decorator(function): return METRIC_REDCAP_REQUEST_SECONDS.labels(function_name or function.__name__).time()(function) return decorator CACHE = FanoutCache(os.environ.get("CACHE")) @metric_redcap_request_seconds("fetch_participant (cached)") def fetch_participant(user_info: dict) -> Optional[Dict[str, str]]: """ Exports a REDCap record matching the given *user_info*. Returns None if no match is found.
import prometheus_client from prometheus_client import Gauge, Summary import reststore from reststore import config proxy_requests = False # prometheus metrics state request_summary = Summary( 'reststore_api_request_duration_seconds', 'Time spent processing api request', ['resource', 'method'] ) request_timer = lambda *x: request_summary.labels(*x).time() file_count_gauge = Gauge( 'reststore_stored_files', 'Number of files in reststore', ['store'] ) file_count_gauge._samples = lambda: _counts() file_size_summary = Summary( 'reststore_file_size_bytes', 'Size of files stored/fetched in bytes', ['store', 'direction'] ) # unfortunately do not have a way to query for current
import os import sys import logging from pymongo import MongoClient, DESCENDING, monitoring from pprint import pprint, pformat import json from prometheus_client import Histogram, Summary import constants from drivers import abstractdriver h = Summary('scenario_latency_seconds', 'Latency of various scenarios', unit='seconds', labelnames=['scenario']) delivery = h.labels(scenario='DELIVERY') new_order = h.labels(scenario='NEW_ORDER') order_status = h.labels(scenario='ORDER_STATUS') payment = h.labels(scenario='PAYMENT') stock_level = h.labels(scenario='STOCK_LEVEL') mongo = Histogram('mongo_latency_seconds', 'Latency of various mongo calls', unit='seconds', labelnames=['op']) TABLE_COLUMNS = { constants.TABLENAME_ITEM: [ "I_ID", # INTEGER "I_IM_ID", # INTEGER "I_NAME", # VARCHAR
class ContextImpl(pulsar.Context): # add label to indicate user metric user_metrics_label_names = Stats.metrics_label_names + ["metric"] def __init__(self, instance_config, logger, pulsar_client, user_code, consumers, secrets_provider, metrics_labels, state_context, stats): self.instance_config = instance_config self.log = logger self.pulsar_client = pulsar_client self.user_code_dir = os.path.dirname(user_code) self.consumers = consumers self.secrets_provider = secrets_provider self.state_context = state_context self.publish_producers = {} self.publish_serializers = {} self.message = None self.current_start_time = None self.user_config = json.loads(instance_config.function_details.userConfig) \ if instance_config.function_details.userConfig \ else [] self.secrets_map = json.loads(instance_config.function_details.secretsMap) \ if instance_config.function_details.secretsMap \ else {} self.metrics_labels = metrics_labels self.user_metrics_map = dict() self.user_metrics_summary = Summary( "pulsar_function_user_metric", 'Pulsar Function user defined metric', ContextImpl.user_metrics_label_names) self.stats = stats # Called on a per message basis to set the context for the current message def set_current_message_context(self, message, topic): self.message = message self.current_start_time = time.time() def get_message_id(self): return self.message.message_id() def get_message_key(self): return self.message.partition_key() def get_message_eventtime(self): return self.message.event_timestamp() def get_message_properties(self): return self.message.properties() def get_current_message_topic_name(self): return self.message.topic_name() def get_partition_key(self): return self.message.partition_key() def get_function_name(self): return self.instance_config.function_details.name def get_function_tenant(self): return self.instance_config.function_details.tenant def get_function_namespace(self): return self.instance_config.function_details.namespace def get_function_id(self): return self.instance_config.function_id def get_instance_id(self): return self.instance_config.instance_id def get_function_version(self): return self.instance_config.function_version def get_logger(self): return self.log def get_user_config_value(self, key): if key in self.user_config: return self.user_config[key] else: return None def get_user_config_map(self): return self.user_config def get_secret(self, secret_key): if not secret_key in self.secrets_map: return None return self.secrets_provider.provide_secret( secret_key, self.secrets_map[secret_key]) def record_metric(self, metric_name, metric_value): if metric_name not in self.user_metrics_map: user_metrics_labels = self.metrics_labels + [metric_name] self.user_metrics_map[ metric_name] = self.user_metrics_summary.labels( *user_metrics_labels) self.user_metrics_map[metric_name].observe(metric_value) def get_output_topic(self): return self.instance_config.function_details.output def get_output_serde_class_name(self): return self.instance_config.function_details.outputSerdeClassName def callback_wrapper(self, callback, topic, message_id, result, msg): if result != pulsar.Result.Ok: error_msg = "Failed to publish to topic [%s] with error [%s] with src message id [%s]" % ( topic, result, message_id) Log.error(error_msg) self.stats.incr_total_sys_exceptions(Exception(error_msg)) if callback: callback(result, msg) def publish(self, topic_name, message, serde_class_name="serde.IdentitySerDe", properties=None, compression_type=None, callback=None): self.publish(topic_name, message, serde_class_name=serde_class_name, compression_type=compression_type, callback=callback, message_conf={"properties": properties}) def publish(self, topic_name, message, serde_class_name="serde.IdentitySerDe", compression_type=None, callback=None, message_conf=None): # Just make sure that user supplied values are properly typed topic_name = str(topic_name) serde_class_name = str(serde_class_name) pulsar_compression_type = pulsar._pulsar.CompressionType.NONE if compression_type is not None: pulsar_compression_type = compression_type if topic_name not in self.publish_producers: self.publish_producers[ topic_name] = self.pulsar_client.create_producer( topic_name, block_if_queue_full=True, batching_enabled=True, batching_max_publish_delay_ms=10, compression_type=pulsar_compression_type, properties=util.get_properties( util.getFullyQualifiedFunctionName( self.instance_config.function_details.tenant, self.instance_config.function_details.namespace, self.instance_config.function_details.name), self.instance_config.instance_id)) if serde_class_name not in self.publish_serializers: serde_klass = util.import_class(self.user_code_dir, serde_class_name) self.publish_serializers[serde_class_name] = serde_klass() output_bytes = bytes( self.publish_serializers[serde_class_name].serialize(message)) if message_conf: self.publish_producers[topic_name].send_async( output_bytes, partial(self.callback_wrapper, callback, topic_name, self.get_message_id()), **message_conf) else: self.publish_producers[topic_name].send_async( output_bytes, partial(self.callback_wrapper, callback, topic_name, self.get_message_id())) def ack(self, msgid, topic): topic_consumer = None if topic in self.consumers: topic_consumer = self.consumers[topic] else: # if this topic is a partitioned topic m = re.search('(.+)-partition-(\d+)', topic) if not m: raise ValueError('Invalid topicname %s' % topic) elif m.group(1) in self.consumers: topic_consumer = self.consumers[m.group(1)] else: raise ValueError('Invalid topicname %s' % topic) topic_consumer.acknowledge(msgid) def get_and_reset_metrics(self): metrics = self.get_metrics() # TODO(sanjeev):- Make this thread safe self.reset_metrics() return metrics def reset_metrics(self): # TODO: Make it thread safe for user_metric in self.user_metrics_map.values(): user_metric._sum.set(0.0) user_metric._count.set(0.0) def get_metrics(self): metrics_map = {} for metric_name, user_metric in self.user_metrics_map.items(): metrics_map["%s%s_sum" % (Stats.USER_METRIC_PREFIX, metric_name)] = user_metric._sum.get() metrics_map["%s%s_count" % (Stats.USER_METRIC_PREFIX, metric_name)] = user_metric._count.get() return metrics_map def incr_counter(self, key, amount): return self.state_context.incr(key, amount) def get_counter(self, key): return self.state_context.get_amount(key) def del_counter(self, key): return self.state_context.delete(key) def put_state(self, key, value): return self.state_context.put(key, value) def get_state(self, key): return self.state_context.get_value(key)
def main(): args = parse_args() logger = setup_logging(args.log_level) metrics_registry = CollectorRegistry() logger.info("Jacko started") es_reporter = ElasticsearchReporter(es_host=args.es_host, es_index=args.es_index, es_type=args.es_type) scrapers = parse_history_server_scrapers(args=args, es_reporter=es_reporter) logger.debug("Initialized %d scrapers", len(scrapers)) external_enrichers = parse_enrichers(args) all_jobs = [] jobs_counter = Counter('jobs_scraped', 'Number of jobs scraped', labelnames=['instance', 'cluster_name'], registry=metrics_registry) scrape_errors_counter = Counter('scrape_errors', 'Number of errors during scrape', labelnames=['instance', 'cluster_name', 'exception_type'], registry=metrics_registry) scrape_time = Summary('scrape_time', 'Time to scrape all jobs', labelnames=['instance', 'cluster_name'], registry=metrics_registry) for (cluster_name, scraper) in scrapers: try: with scrape_time.labels(HOST, cluster_name).time(): jobs = scraper.scrape() jobs_counter.labels(HOST, cluster_name).inc(len(jobs)) scraper_enrichers = [ClusterNameEnricher(cluster_name)] enrichers = scraper_enrichers + external_enrichers for job in jobs: for enricher_class in enrichers: enricher_class.enrich(job) all_jobs.extend(jobs) except Exception as e: logger.exception("Exception encountered while processing cluster %s", cluster_name) scrape_errors_counter.labels(HOST, cluster_name, type(e).__name__).inc() if args.abort_on_error: return "Exception encountered while processing cluster %s, aborting." % cluster_name jobs_count = len(all_jobs) logger.info("Scraped %d jobs", jobs_count) if jobs_count > 0: if not args.skip_indexing: logger.info("Indexing to Elasticsearch, host %s index %s type %s", es_reporter.es_host, es_reporter.es_index, es_reporter.es_type) indexed_jobs_counter = Counter('docs_indexed', 'Number of jobs indexed', labelnames=['instance', 'elasticsearch_host'], registry=metrics_registry) index_time = Summary('index_time', 'Time to index all jobs', labelnames=['instance', 'elasticsearch_host'], registry=metrics_registry) with index_time.labels(HOST, es_reporter.es_host).time(): es_reporter.report(all_jobs) logger.info("%d jobs indexed in Elasticsearch", jobs_count) indexed_jobs_counter.labels(HOST, es_reporter.es_host).inc(jobs_count) else: logger.info("Skipping indexing to Elasticsearch") if args.prometheus_pushgateways: Counter('jacko_runs', 'Number of Jacko runs', ['instance'], registry=metrics_registry).labels(HOST).inc() for gateway in args.prometheus_pushgateways: try: pushadd_to_gateway(gateway=gateway, job='jacko', registry=metrics_registry, timeout=5) except Exception as e: logger.exception('Error pushing metrics to %s' % gateway) logger.info("Jacko finished")