def _post_generic_trigger(self, liveaction_db=None, execution_db=None): if not cfg.CONF.action_sensor.enable: LOG.debug('Action trigger is disabled, skipping trigger dispatch...') return execution_id = str(execution_db.id) extra = {'execution': execution_db} target_statuses = cfg.CONF.action_sensor.emit_when if execution_db.status not in target_statuses: msg = 'Skip action execution "%s" because state "%s" is not in %s' LOG.debug(msg % (execution_id, execution_db.status, target_statuses), extra=extra) return with CounterWithTimer(key='notifier.generic_trigger.post'): payload = {'execution_id': execution_id, 'status': liveaction_db.status, 'start_timestamp': str(liveaction_db.start_timestamp), # deprecate 'action_name' at some point and switch to 'action_ref' 'action_name': liveaction_db.action, 'action_ref': liveaction_db.action, 'runner_ref': self._get_runner_ref(liveaction_db.action), 'parameters': liveaction_db.get_masked_parameters(), 'result': liveaction_db.result} # Use execution_id to extract trace rather than liveaction. execution_id # will look-up an exact TraceDB while liveaction depending on context # may not end up going to the DB. trace_context = self._get_trace_context(execution_id=execution_id) LOG.debug('POSTing %s for %s. Payload - %s. TraceContext - %s', ACTION_TRIGGER_TYPE['name'], liveaction_db.id, payload, trace_context) with CounterWithTimer(key='notifier.generic_trigger.dispatch'): self._trigger_dispatcher.dispatch(self._action_trigger, payload=payload, trace_context=trace_context)
def process(self, execution_db): execution_id = str(execution_db.id) extra = {'execution': execution_db} LOG.debug('Processing action execution "%s".', execution_id, extra=extra) # Get the corresponding liveaction record. liveaction_db = LiveAction.get_by_id(execution_db.liveaction['id']) if execution_db.status in LIVEACTION_COMPLETED_STATES: # If the action execution is executed under an orquesta workflow, policies for the # action execution will be applied by the workflow engine. A policy may affect the # final state of the action execution thereby impacting the state of the workflow. if not workflow_service.is_action_execution_under_workflow_context( execution_db): with CounterWithTimer(key='notifier.apply_post_run_policies'): policy_service.apply_post_run_policies(liveaction_db) if liveaction_db.notify: with CounterWithTimer(key='notifier.notify_trigger.post'): self._post_notify_triggers(liveaction_db=liveaction_db, execution_db=execution_db) self._post_generic_trigger(liveaction_db=liveaction_db, execution_db=execution_db)
def handle_action_execution_with_instrumentation(ac_ex_db): # Ignore non orquesta workflow executions if not self._is_orquesta_execution(ac_ex_db=ac_ex_db): return with CounterWithTimer(key='orquesta.action.executions'): return self.handle_action_execution(ac_ex_db=ac_ex_db)
def dispatch(self, liveaction_db): action_db = get_action_by_ref(liveaction_db.action) if not action_db: raise Exception("Action %s not found in DB." % (liveaction_db.action)) liveaction_db.context["pack"] = action_db.pack runner_type_db = get_runnertype_by_name(action_db.runner_type["name"]) extra = { "liveaction_db": liveaction_db, "runner_type_db": runner_type_db } LOG.info("Dispatching Action to a runner", extra=extra) # Get runner instance. runner = self._get_runner(runner_type_db, action_db, liveaction_db) LOG.debug('Runner instance for RunnerType "%s" is: %s', runner_type_db.name, runner) # Process the request. funcs = { action_constants.LIVEACTION_STATUS_REQUESTED: self._do_run, action_constants.LIVEACTION_STATUS_SCHEDULED: self._do_run, action_constants.LIVEACTION_STATUS_RUNNING: self._do_run, action_constants.LIVEACTION_STATUS_CANCELING: self._do_cancel, action_constants.LIVEACTION_STATUS_PAUSING: self._do_pause, action_constants.LIVEACTION_STATUS_RESUMING: self._do_resume, } if liveaction_db.status not in funcs: raise actionrunner.ActionRunnerDispatchError( "Action runner is unable to dispatch the liveaction because it is " 'in an unsupported status of "%s".' % liveaction_db.status) with CounterWithTimer(key="action.executions"): status = liveaction_db.status with CounterWithTimer(key="action.executions.process.%s" % (status)): liveaction_db = funcs[liveaction_db.status](runner) return liveaction_db.result
def __call__(self, environ, start_response): request = Request(environ) metrics_driver = get_driver() key = '%s.request.total' % (self._service_name) metrics_driver.inc_counter(key) key = '%s.request.method.%s' % (self._service_name, request.method) metrics_driver.inc_counter(key) path = request.path.replace('/', '_') key = '%s.request.path.%s' % (self._service_name, path) metrics_driver.inc_counter(key) if self._service_name == 'stream': # For stream service, we also record current number of open connections. # Due to the way stream service works, we need to utilize eventlet posthook to # correctly set the counter when the connection is closed / full response is returned. # See http://eventlet.net/doc/modules/wsgi.html#non-standard-extension-to-support-post- # hooks for details # Increase request counter key = '%s.request' % (self._service_name) metrics_driver.inc_counter(key) # Increase "total number of connections" gauge metrics_driver.inc_gauge('stream.connections', 1) start_time = get_datetime_utc_now() def update_metrics_hook(env): # Hook which is called at the very end after all the response has been sent and # connection closed time_delta = (get_datetime_utc_now() - start_time) duration = time_delta.total_seconds() # Send total request time metrics_driver.time(key, duration) # Decrease "current number of connections" gauge metrics_driver.dec_gauge('stream.connections', 1) # NOTE: Some tests mock environ and there 'eventlet.posthooks' key is not available if 'eventlet.posthooks' in environ: environ['eventlet.posthooks'].append( (update_metrics_hook, (), {})) return self.app(environ, start_response) else: # Track and time current number of processing requests key = '%s.request' % (self._service_name) with CounterWithTimer(key=key): return self.app(environ, start_response)
def process(self, pre_ack_response): trigger_instance, message = self._decompose_pre_ack_process_response( pre_ack_response) if not trigger_instance: raise ValueError("No trigger_instance provided for processing.") get_driver().inc_counter("trigger.%s.processed" % (trigger_instance.trigger)) try: # Use trace_context from the message and if not found create a new context # and use the trigger_instance.id as trace_tag. trace_context = message.get(TRACE_CONTEXT, None) if not trace_context: trace_context = { TRACE_ID: "trigger_instance-%s" % str(trigger_instance.id) } # add a trace or update an existing trace with trigger_instance trace_service.add_or_update_given_trace_context( trace_context=trace_context, trigger_instances=[ trace_service.get_trace_component_for_trigger_instance( trigger_instance) ], ) container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSING) with CounterWithTimer(key="rule.processed"): with Timer(key="trigger.%s.processed" % (trigger_instance.trigger)): self.rules_engine.handle_trigger_instance(trigger_instance) container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSED) except: # TODO : Capture the reason for failure. container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSING_FAILED) # This could be a large message but at least in case of an exception # we get to see more context. # Beyond this point code cannot really handle the exception anyway so # eating up the exception. LOG.exception("Failed to handle trigger_instance %s.", trigger_instance) return
def _do_run(self, runner, runnertype_db, action_db, liveaction_db): # Create a temporary auth token which will be available # for the duration of the action execution. runner.auth_token = self._create_auth_token( context=runner.context, action_db=action_db, liveaction_db=liveaction_db) try: # Finalized parameters are resolved and then rendered. This process could # fail. Handle the exception and report the error correctly. try: runner_params, action_params = param_utils.render_final_params( runnertype_db.runner_parameters, action_db.parameters, liveaction_db.parameters, liveaction_db.context) runner.runner_parameters = runner_params except ParamException as e: raise actionrunner.ActionRunnerException(str(e)) LOG.debug('Performing pre-run for runner: %s', runner.runner_id) runner.pre_run() # Mask secret parameters in the log context resolved_action_params = ResolvedActionParameters( action_db=action_db, runner_type_db=runnertype_db, runner_parameters=runner_params, action_parameters=action_params) extra = {'runner': runner, 'parameters': resolved_action_params} LOG.debug('Performing run for runner: %s' % (runner.runner_id), extra=extra) with CounterWithTimer( key=format_metrics_key(action_db=action_db, key='action')): (status, result, context) = runner.run(action_params) try: result = json.loads(result) except: pass action_completed = status in action_constants.LIVEACTION_COMPLETED_STATES if (isinstance(runner, PollingAsyncActionRunner) and runner.is_polling_enabled() and not action_completed): queries.setup_query(liveaction_db.id, runnertype_db, context) except: LOG.exception('Failed to run action.') _, ex, tb = sys.exc_info() # mark execution as failed. status = action_constants.LIVEACTION_STATUS_FAILED # include the error message and traceback to try and provide some hints. result = { 'error': str(ex), 'traceback': ''.join(traceback.format_tb(tb, 20)) } context = None finally: # Log action completion extra = {'result': result, 'status': status} LOG.debug('Action "%s" completed.' % (action_db.name), extra=extra) # Update the final status of liveaction and corresponding action execution. liveaction_db = self._update_status(liveaction_db.id, status, result, context) # Always clean-up the auth_token # This method should be called in the finally block to ensure post_run is not impacted. self._clean_up_auth_token(runner=runner, status=status) LOG.debug('Performing post_run for runner: %s', runner.runner_id) runner.post_run(status=status, result=result) LOG.debug('Runner do_run result', extra={'result': liveaction_db.result}) LOG.audit('Liveaction completed', extra={'liveaction_db': liveaction_db}) return liveaction_db
def __call__(self, environ, start_response): request = Request(environ) try: endpoint, _ = self.router.match(request) except NotFoundException: endpoint = {} # NOTE: We don't track per request and response metrics for /v1/executions/<id> and some # other endpoints because this would result in a lot of unique metrics which is an # anti-pattern and causes unnecessary load on the metrics server. submit_metrics = endpoint.get("x-submit-metrics", True) operation_id = endpoint.get("operationId", None) is_get_one_endpoint = bool(operation_id) and ( operation_id.endswith(".get") or operation_id.endswith(".get_one")) if is_get_one_endpoint: # NOTE: We don't submit metrics for any get one API endpoint since this would result # in potentially too many unique metrics submit_metrics = False if not submit_metrics: LOG.debug("Not submitting request metrics for path: %s" % (request.path)) return self.app(environ, start_response) metrics_driver = get_driver() key = "%s.request.total" % (self._service_name) metrics_driver.inc_counter(key) key = "%s.request.method.%s" % (self._service_name, request.method) metrics_driver.inc_counter(key) path = request.path.replace("/", "_") key = "%s.request.path.%s" % (self._service_name, path) metrics_driver.inc_counter(key) if self._service_name == "stream": # For stream service, we also record current number of open connections. # Due to the way stream service works, we need to utilize eventlet posthook to # correctly set the counter when the connection is closed / full response is returned. # See http://eventlet.net/doc/modules/wsgi.html#non-standard-extension-to-support-post- # hooks for details # Increase request counter key = "%s.request" % (self._service_name) metrics_driver.inc_counter(key) # Increase "total number of connections" gauge metrics_driver.inc_gauge("stream.connections", 1) start_time = get_datetime_utc_now() def update_metrics_hook(env): # Hook which is called at the very end after all the response has been sent and # connection closed time_delta = get_datetime_utc_now() - start_time duration = time_delta.total_seconds() # Send total request time metrics_driver.time(key, duration) # Decrease "current number of connections" gauge metrics_driver.dec_gauge("stream.connections", 1) # NOTE: Some tests mock environ and there 'eventlet.posthooks' key is not available if "eventlet.posthooks" in environ: environ["eventlet.posthooks"].append( (update_metrics_hook, (), {})) return self.app(environ, start_response) else: # Track and time current number of processing requests key = "%s.request" % (self._service_name) with CounterWithTimer(key=key): return self.app(environ, start_response)
def _post_notify_subsection_triggers(self, liveaction_db=None, execution_db=None, notify_subsection=None, default_message_suffix=None): routes = (getattr(notify_subsection, 'routes') or getattr(notify_subsection, 'channels', [])) or [] execution_id = str(execution_db.id) if routes and len(routes) >= 1: payload = {} message = notify_subsection.message or ( 'Action ' + liveaction_db.action + ' ' + default_message_suffix) data = notify_subsection.data or {} jinja_context = self._build_jinja_context( liveaction_db=liveaction_db, execution_db=execution_db) try: with Timer(key='notifier.transform_message'): message = self._transform_message(message=message, context=jinja_context) except: LOG.exception('Failed (Jinja) transforming `message`.') try: with Timer(key='notifier.transform_data'): data = self._transform_data(data=data, context=jinja_context) except: LOG.exception('Failed (Jinja) transforming `data`.') # At this point convert result to a string. This restricts the rulesengines # ability to introspect the result. On the other handle atleast a json usable # result is sent as part of the notification. If jinja is required to convert # to a string representation it uses str(...) which make it impossible to # parse the result as json any longer. # TODO: Use to_serializable_dict data['result'] = json.dumps(liveaction_db.result) payload['message'] = message payload['data'] = data payload['execution_id'] = execution_id payload['status'] = liveaction_db.status payload['start_timestamp'] = isotime.format( liveaction_db.start_timestamp) try: payload['end_timestamp'] = isotime.format( liveaction_db.end_timestamp) except AttributeError: # This can be raised if liveaction.end_timestamp is None, which is caused # when policy cancels a request due to concurrency # In this case, use datetime.now() instead payload['end_timestamp'] = isotime.format(datetime.utcnow()) payload['action_ref'] = liveaction_db.action payload['runner_ref'] = self._get_runner_ref(liveaction_db.action) trace_context = self._get_trace_context(execution_id=execution_id) failed_routes = [] for route in routes: try: payload['route'] = route # Deprecated. Only for backward compatibility reasons. payload['channel'] = route LOG.debug('POSTing %s for %s. Payload - %s.', NOTIFY_TRIGGER_TYPE['name'], liveaction_db.id, payload) with CounterWithTimer( key='notifier.notify_trigger.dispatch'): self._trigger_dispatcher.dispatch( self._notify_trigger, payload=payload, trace_context=trace_context) except: failed_routes.append(route) if len(failed_routes) > 0: raise Exception('Failed notifications to routes: %s' % ', '.join(failed_routes))
def _do_run(self, runner): # Create a temporary auth token which will be available # for the duration of the action execution. runner.auth_token = self._create_auth_token( context=runner.context, action_db=runner.action, liveaction_db=runner.liveaction, ) try: # Finalized parameters are resolved and then rendered. This process could # fail. Handle the exception and report the error correctly. try: runner_params, action_params = param_utils.render_final_params( runner.runner_type.runner_parameters, runner.action.parameters, runner.liveaction.parameters, runner.liveaction.context, ) runner.runner_parameters = runner_params except ParamException as e: raise actionrunner.ActionRunnerException(six.text_type(e)) LOG.debug("Performing pre-run for runner: %s", runner.runner_id) runner.pre_run() # Mask secret parameters in the log context resolved_action_params = ResolvedActionParameters( action_db=runner.action, runner_type_db=runner.runner_type, runner_parameters=runner_params, action_parameters=action_params, ) extra = {"runner": runner, "parameters": resolved_action_params} LOG.debug("Performing run for runner: %s" % (runner.runner_id), extra=extra) with CounterWithTimer(key="action.executions"): with CounterWithTimer(key="action.%s.executions" % (runner.action.ref)): (status, result, context) = runner.run(action_params) result = jsonify.try_loads(result) action_completed = status in action_constants.LIVEACTION_COMPLETED_STATES if (isinstance(runner, PollingAsyncActionRunner) and runner.is_polling_enabled() and not action_completed): queries.setup_query(runner.liveaction.id, runner.runner_type, context) except: LOG.exception("Failed to run action.") _, ex, tb = sys.exc_info() # mark execution as failed. status = action_constants.LIVEACTION_STATUS_FAILED # include the error message and traceback to try and provide some hints. result = { "error": str(ex), "traceback": "".join(traceback.format_tb(tb, 20)), } context = None finally: # Log action completion extra = {"result": result, "status": status} LOG.debug('Action "%s" completed.' % (runner.action.name), extra=extra) # Update the final status of liveaction and corresponding action execution. with Timer(key="action.executions.update_status"): runner.liveaction = self._update_status( runner.liveaction.id, status, result, context) # Always clean-up the auth_token # This method should be called in the finally block to ensure post_run is not impacted. self._clean_up_auth_token(runner=runner, status=status) LOG.debug("Performing post_run for runner: %s", runner.runner_id) runner.post_run(status=status, result=result) LOG.debug("Runner do_run result", extra={"result": runner.liveaction.result}) LOG.audit("Liveaction completed", extra={"liveaction_db": runner.liveaction}) return runner.liveaction
def handle_action_execution_with_instrumentation(ac_ex_db): with CounterWithTimer(key='orquesta.action.executions'): return self.handle_action_execution(ac_ex_db=ac_ex_db)
def handle_workflow_execution_with_instrumentation(wf_ex_db): with CounterWithTimer(key='orquesta.workflow.executions'): return self.handle_workflow_execution(wf_ex_db=wf_ex_db)