def handle_request(self): """ Handles the custom resource request from cloudformation :return: """ start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) self._logger.info("Cloudformation request is {}", safe_json(self._event, indent=2)) try: result = CustomResource.handle_request(self) return safe_dict({ "result": result, "datetime": datetime.now().isoformat(), "running-time": (datetime.now() - start).total_seconds() }) except Exception as ex: self._logger.error("{} {}", ex, traceback.format_exc()) raise ex finally: self._logger.flush()
def handle_request(self): """ Handles the cloudwatch rule timer event :return: Started tasks, if any, information """ try: start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) count = 0 tracking_table = TaskTrackingTable(context=self._context) for task in tracking_table.get_tasks_to_check_for_completion(): count += 1 task_id = task[tracking.TASK_TR_ID] last_check_for_completion_time = datetime.now().isoformat() tracking_table.update_action( task_id, status_data={ tracking.TASK_TR_LAST_WAIT_COMPLETION: last_check_for_completion_time }) self._logger.info(INF_SET_COMPLETION_TASK_TIMER, task[tracking.TASK_TR_NAME], task_id, last_check_for_completion_time) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INF_COMPETION_ITEMS_SET, running_time, count) if count == 0: rule = handlers.disable_completion_cloudwatch_rule( self._context) self._logger.info(INF_DISABLED_COMPLETION_TIMER, rule) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "tasks-to_check": count }) except ValueError as ex: self._logger.error("{}\n{}".format( ex, safe_json(self._event, indent=2))) finally: self._logger.flush()
def handle_request(self): start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) self._logger.info("Cloudformation request is {}", safe_json(self._event, indent=2)) try: result = CustomResource.handle_request(self) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": (datetime.now() - start).total_seconds(), "result": result }) finally: self._logger.flush()
def lambda_handler(event, context): dt = datetime.utcnow() logstream = LOG_STREAM.format("OpsAutomatorMain", dt.year, dt.month, dt.day) with Logger(logstream=logstream, context=context, buffersize=20) as logger: logger.info("Ops Automator, version %version%") for handler_name in handlers.all_handlers(): if handlers.get_class_for_handler(handler_name).is_handling_request(event): handler = handlers.create_handler(handler_name, event, context) logger.info("Handler is {}", handler_name) try: result = handler.handle_request() logger.info(MSG_REQUEST_HANLED, handler_name, (datetime.utcnow() - dt).total_seconds()) return safe_dict(result) except Exception as e: logger.error(MSG_ERR_HANDLING_REQUEST, safe_json(event, indent=2), handler_name, e, traceback.format_exc()) return logger.error(MSG_NO_REQUEST_HANDLER, safe_json(event, indent=2))
def _handle_test_task_completion(self, action_instance, arguments): self._logger.info(INFO_CHECK_TASK_COMPLETION, self.action, self.task, self.action_id, json.dumps(self.action_parameters, indent=2), self.start_result) execution_time = round(float((time() - self.started_at)), 3) execution_time_str = str(timedelta(seconds=execution_time)) result_data = { "task": self.task, "action": self.action, "id": self.action_id, "datetime": datetime.now().isoformat(), "running-time": execution_time } # make one more check for completion before testing for timeout check_result = action_instance.is_completed(arguments, self.start_result) if check_result is not None: self._action_tracking.update_action( action_id=self.action_id, status=tracking.STATUS_COMPLETED, status_data={ tracking.TASK_TR_RESULT: str(check_result), tracking.TASK_TR_EXECUTION_TIME: str(execution_time) }) self._logger.info(INFO_TASK_COMPLETED, str(check_result), execution_time_str) result_data.update({ "result": tracking.STATUS_COMPLETED, "action-result": str(check_result) }) elif execution_time > self.timeout: self._action_tracking.update_action( action_id=self.action_id, status=tracking.STATUS_TIMED_OUT, status_data={ tracking.TASK_TR_EXECUTION_TIME: str(execution_time) }) self._logger.error(ERR_TASK_TIMEOUT, execution_time_str) result_data.update({"result": tracking.STATUS_TIMED_OUT}) return result_data else: self._logger.info(INFO_ACTION_NOT_COMPLETED, execution_time_str) result_data.update({"result": tracking.STATUS_WAIT_FOR_COMPLETION}) return safe_dict(result_data)
def _handle_task_execution(self, action_instance, args): def handle_metrics(result): self._logger.info( "Sending metrics data is {}", "enabled" if allow_send_metrics() else "disabled") if allow_send_metrics(): try: result_data = result if isinstance( result, dict) else json.loads(result) if actions.METRICS_DATA in result_data: send_metrics_data( metrics=result_data[actions.METRICS_DATA], logger=self._logger) except Exception as ex: self._logger.warning( "Error processing or sending metrics data ({})", str(ex)) self._logger.info(INFO_ACTION, self.action, self.action_id, "in dry-run mode " if self.dryrun else "", self.task, json.dumps(self.action_parameters, indent=2)) self._logger.info(INFO_LAMBDA_MEMORY, self._context.function_name, self._context.memory_limit_in_mb) self._action_tracking.update_action(self.action_id, status=tracking.STATUS_STARTED) start = time() return_data = { "task": self.task, "action": self.action, "id": self.action_id, "dryrun": self.dryrun, } action_result = action_instance.execute(args) if not action_instance.properties.get(actions.ACTION_INTERNAL, False): handle_metrics(action_result) execution_time = round(float((time() - start)), 3) if self.test_completion_method is None or self.dryrun: self._action_tracking.update_action( action_id=self.action_id, status=tracking.STATUS_COMPLETED, status_data={ tracking.TASK_TR_STARTED_TS: datetime.now().isoformat(), tracking.TASK_TR_RESULT: str(action_result), tracking.TASK_TR_EXECUTION_TIME: str(execution_time), tracking.TASK_TR_EXECUTION_LOGSTREAM: self.execution_log_stream }) self._logger.info(INFO_ACTION_RESULT, execution_time, str(action_result)) else: # the action has a method for testing completion of the task, set the status to waiting and store the result # of the execution that started the action as start result that will be passed to the completion method together self._action_tracking.update_action( action_id=self.action_id, status=tracking.STATUS_WAIT_FOR_COMPLETION, status_data={ tracking.TASK_TR_LAST_WAIT_COMPLETION: datetime.now().isoformat(), tracking.TASK_TR_STARTED_TS: int(start), tracking.TASK_TR_START_RESULT: str(action_result), tracking.TASK_TR_START_EXECUTION_TIME: str(execution_time), tracking.TASK_TR_EXECUTION_LOGSTREAM: self.execution_log_stream }) self._logger.info(INFO_STARTED_AND_WAITING_FOR_COMPLETION, str(action_result)) if self._context is not None: rule = handlers.enable_completion_cloudwatch_rule( self._context) self._logger.info(INFO_RULE_ENABLED, rule) # no exception from action return_data.update({ "result": tracking.STATUS_COMPLETED, "action-result": str(action_result), "datetime": datetime.now().isoformat(), "running-time": execution_time }) return safe_dict(return_data)
def handle_request(self): """ Handled the cloudwatch rule timer event :return: Started tasks, if any, information """ def is_matching_event_state(event_state, ec2event): return event_state in [s.strip() for s in ec2event.split(",") ] or ec2event != "*" try: result = [] start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) state = self._event.get("detail", {}).get("state") if state is not None: state = state.lower() account = self._event["account"] region = self._event["region"] instance_id = self._event["detail"]["instance-id"] dt = self._event["time"] task = None try: # for all ec2 events tasks in configuration for task in [ t for t in TaskConfiguration(context=self._context, logger=self._logger).get_tasks() if t.get("events") is not None and EC2_STATE_EVENT in t["events"] and t.get("enabled", True) ]: task_name = task["name"] ec2_event = task["events"][EC2_STATE_EVENT] if not is_matching_event_state(state, ec2_event): continue result.append(task_name) self._logger.info(INFO_EVENT, task_name, state, instance_id, account, region, safe_json(task, indent=2)) # create an event for lambda function that scans for resources for this task event = { handlers.HANDLER_EVENT_ACTION: handlers.HANDLER_ACTION_SELECT_RESOURCES, handlers.HANDLER_SELECT_ARGUMENTS: { handlers.HANDLER_EVENT_REGIONS: [region], handlers.HANDLER_EVENT_ACCOUNT: account, "InstanceIds": [instance_id] }, handlers.HANDLER_EVENT_SOURCE: EC2_STATE_EVENT, handlers.HANDLER_EVENT_TASK: task, handlers.HANDLER_EVENT_TASK_DT: dt } if self._context is not None: # start lambda function to scan for task resources payload = str.encode(safe_json(event)) client = get_client_with_retries("lambda", ["invoke"], context=self._context) client.invoke_with_retries( FunctionName=self._context.function_name, Qualifier=self._context.function_version, InvocationType="Event", LogType="None", Payload=payload) else: # or if not running in lambda environment pass event to main task handler lambda_handler(event, None) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": (datetime.now() - start).total_seconds(), "event-datetime": dt, "started-tasks": result }) except ValueError as ex: self._logger.error("{}\n{}".format(ex, safe_json(task, indent=2))) finally: self._logger.flush()
def handle_request(self): """ Handles the select resources request. Creates new actions for resources found for a task :return: Results of handling the request """ def is_selected_resource(resource, taskname, tags_filter, does_resource_supports_tags): """ Tests if item is a selected resource for this task :param resource: The tested resource :param taskname: Name of the task :param tags_filter: Tag filter :param does_resource_supports_tags: Trie if the resource supports tags :return: True if resource is selected """ # No tags then always selected if not does_resource_supports_tags: self._logger.debug(DEBUG_RESOURCE_NO_TAGS, resource) return True tags = resource.get("Tags", {}) # name of the tag that holds the list of tasks for this resource tagname = self._task_tag if tags_filter is None: # test if name of the task is in list of tasks in tag value if tagname in tags and taskname in [ x.strip() for x in tags[tagname].split(',') ]: self._logger.debug( DEBUG_SELECTED_BY_TASK_NAME_IN_TAG_VALUE, safe_json(resource, indent=2), tagname, taskname) return True else: # using a tag filter, * means any tag if tags_filter == "*": self._logger.debug(DEBUG_SELECTED_WILDCARD_TAG_FILTER, safe_json(resource, indent=2), taskname) return True # test if there are any tags matching the tag filter matched_tags = TagFilterSet( tags_filter).pairs_matching_any_filter(tags) if len(matched_tags) != 0: self._logger.debug(DEBUG_SELECTED_BY_TAG_FILTER, safe_json(resource, indent=2), matched_tags, tag_filter, taskname) return True self._logger.debug(DEBUG_RESOURCE_NOT_SELECTED, safe_json(resource, indent=2), taskname) return False def resource_batches(resources): """ Returns resources as chunks of size items. If the class has an optional custom aggregation function then the reousrces are aggregated first using this function before applying the batch size :param resources: resources to process :return: Generator for blocks of resource items """ aggregate_func = getattr(self.action_class, actions.CUSTOM_AGGREGATE_METHOD, None) batch_size = self.action_properties.get(actions.ACTION_BATCH_SIZE) for i in aggregate_func( resources, self.task_parameters) if aggregate_func is not None else [ resources ]: if batch_size is None: yield i else: first = 0 while first < len(i): yield i[first:first + batch_size] first += batch_size try: items = [] start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) self._logger.debug(DEBUG_EVENT, safe_json(self._event, indent=2)) self._logger.debug(DEBUG_ACTION, safe_json(self.action_properties, indent=2)) self._logger.info(INFO_SELECTED_RESOURCES, self.resource_name, self.service, self.task[handlers.TASK_NAME]) self._logger.info(INFO_AGGR_LEVEL, self.aggregation_level) task_level_aggregated_resources = [] args = self._build_describe_argument() supports_tags = self.action_properties.get( actions.ACTION_RESOURCES) in services.create_service( self.service).resources_with_tags args["tags"] = supports_tags self._logger.info(INFO_USE_TAGS_TO_SELECT, "R" if supports_tags else "No r") task_name = self.task[handlers.TASK_NAME] # get optional tag filter tag_filter = self.task.get(handlers.TASK_TAG_FILTER) if tag_filter is None: self._logger.debug(DEBUG_SELECT_BY_TASK_NAME, self.resource_name, self._task_tag, task_name) elif tag_filter == "*": self._logger.debug(DEBUG_SELECT_ALL_RESOURCES, self.resource_name) else: self._logger.debug(DEBUG_TAG_FILTER_USED_TO_SELECT_RESOURCES, self.resource_name) with TaskTrackingTable(self._context) as actions_tracking: for service in self._account_service_sessions(self.service): assumed_role = service.assumed_role self._logger.info(INFO_ACCOUNT, service.aws_account) if assumed_role is not None: self._logger.info(INFO_ASSUMED_ROLE, assumed_role) for region in self._regions: if region is not None: args["region"] = region else: if "region" in args: del args["region"] self._logger.debug(DEBUG_SELECT_PARAMETERS, self.resource_name, self.service, args) # selecting a list of all resources in this account/region all_resources = list( service.describe(self.resource_name, **args)) logstr = INFO_RESOURCES_FOUND.format( len(all_resources)) if region is not None: logstr = INFO_IN_REGION.format(logstr, region) self._logger.info(logstr) # select resources that are processed by the task selected = list([ sr for sr in all_resources if is_selected_resource( sr, task_name, tag_filter, supports_tags) ]) if len(all_resources) > 0: self._logger.info(INFO_RESOURCES_SELECTED, len(selected)) if len(selected) == 0: continue if not self.keep_tags: for res in selected: if "Tags" in res: del res["Tags"] if self.aggregation_level == actions.ACTION_AGGREGATION_TASK: task_level_aggregated_resources += selected elif self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT: if self._check_can_execute(selected): # create tasks action for account aggregated resources , optionally split in batch size chunks for r in resource_batches(selected): action_item = actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=r, task_datetime=self.task_dt, source=self.source) items.append(action_item) self._logger.info( INFO_ACCOUNT_AGGREGATED, action_item[tracking.TASK_TR_ID], len(r), self.resource_name, self.task[handlers.TASK_NAME]) else: for res in selected: # task action for each selected resource action_item = actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=res, task_datetime=self.task_dt, source=self.source) items.append(action_item) self._logger.info( INFO_RESOURCE, action_item[tracking.TASK_TR_ID], self.resource_name, self.task[handlers.TASK_NAME]) if self.aggregation_level == actions.ACTION_AGGREGATION_TASK and len( task_level_aggregated_resources) > 0: if self._check_can_execute( task_level_aggregated_resources): for r in resource_batches( task_level_aggregated_resources): # create tasks action for task aggregated resources , optionally split in batch size chunks action_item = actions_tracking.add_task_action( task=self.task, assumed_role=None, action_resources=r, task_datetime=self.task_dt, source=self.source) items.append(action_item) self._logger.info(INFO_TASK_AGGREGATED, action_item[tracking.TASK_TR_ID], len(r), self.resource_name, self.task[handlers.TASK_NAME]) self._logger.info(INFO_ADDED_ITEMS, len(items), self.task[handlers.TASK_NAME]) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INFO_RESULT, running_time) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "dispatched-tasks": items }) finally: self._logger.flush()
def handle_request(self): """ Handles the event triggered by updates to the actions tracking table. :return: results of handling selected updates """ def tasks_items_to_execute(): """ Generator function that selects all record items from the event that need processing. :return: """ def is_new_action(task_record): if task_record["eventName"] == "INSERT": return task_record["dynamodb"]["NewImage"].get( tracking.TASK_TR_STATUS).get("S") return False def is_completed_with_concurrency(task_record): if task_record["eventName"] in ["UPDATE", "MODIFY"]: new_task_item = task_record["dynamodb"]["NewImage"] concurrency_key = new_task_item.get( tracking.TASK_TR_CONCURRENCY_KEY, {}).get("S") status = new_task_item.get(tracking.TASK_TR_STATUS, {}).get("S") return concurrency_key is not None and status in [ tracking.STATUS_COMPLETED, tracking.STATUS_FAILED, tracking.STATUS_TIMED_OUT ] def is_wait_for_completion(task_record): if task_record["eventName"] in ["UPDATE", "MODIFY"]: old_task_item = task_record["dynamodb"]["OldImage"] old_status = old_task_item.get(tracking.TASK_TR_STATUS, {}).get("S") old_wait_ts = old_task_item.get( tracking.TASK_TR_LAST_WAIT_COMPLETION, {}).get("S") new_task_item = task_record["dynamodb"]["NewImage"] new_status = new_task_item.get(tracking.TASK_TR_STATUS, {}).get("S") new_wait_ts = new_task_item.get( tracking.TASK_TR_LAST_WAIT_COMPLETION, {}).get("S") return old_status == tracking.STATUS_WAIT_FOR_COMPLETION and new_status == tracking.STATUS_WAIT_FOR_COMPLETION \ and old_wait_ts is not None and old_wait_ts != new_wait_ts for record in self._event.get("Records"): self._logger.debug("Record to process is {}", safe_json(record, indent=2)) if record.get("eventSource") == "aws:dynamodb": if record["eventName"] == "REMOVE": continue update_to_handle = None self._logger.debug_enabled = record["dynamodb"][ "NewImage"].get(tracking.TASK_TR_DEBUG, {}).get("BOOL", False) if is_new_action(record): update_to_handle = NEW_TASK elif is_completed_with_concurrency(record): update_to_handle = FINISHED_CONCURRENY_TASK elif is_wait_for_completion(record): update_to_handle = CHECK_COMPLETION if update_to_handle is not None: yield update_to_handle, record else: self._logger.debug("No action for this record") try: start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) for task_tracking_update_type, task_tracking_record in tasks_items_to_execute( ): self.done_work = True new_image = task_tracking_record["dynamodb"]["NewImage"] task_item = { i: new_image[i][new_image[i].keys()[0]] for i in new_image } self._logger.debug_enabled = task_item.get( tracking.TASK_TR_DEBUG, False) if task_tracking_update_type == NEW_TASK: self._handle_new_task_item(task_item) elif task_tracking_update_type == FINISHED_CONCURRENY_TASK: self._handle_completed_concurrency_item(task_item) elif task_tracking_update_type == CHECK_COMPLETION: self._handle_check_completion(task_item) else: if not self.done_work: self._logger.clear() running_time = float((datetime.now() - start).total_seconds()) if self.done_work: self._logger.info(INFO_RESULT, running_time) return safe_dict({ "datetime": datetime.now().isoformat(), "waiting-for-execution": self.waiting_for_execution_tasks, "started-check-for-completion": self.started_completion_checks, "started-execution": self.started_tasks, "started-waiting": self.started_waiting_tasks, "completed-concurrency-tasks": self.finished_concurrency_tasks, "running-time": running_time }) finally: self._logger.flush()
def handle_request(self): """ Handles the cloudwatch rule timer event :return: Started tasks, if any, information """ try: started_tasks = [] start = datetime.now() last_run_dt = self._get_last_run() self._logger.info("Handler {}", self.__class__.__name__) self._logger.info(INFO_LAST_SAVED, str(last_run_dt)) if self.configuration_update: self._logger.info(INFO_CONFIG_RUN, self.updated_task) # test if we already executed in this minute current_dt = self._set_last_run() already_ran_this_minute = last_run_dt == current_dt if already_ran_this_minute and not self.configuration_update: self._logger.info(INFO_TASK_SCHEDULER_ALREADY_RAN) else: self._logger.info(INFO_CURRENT_SCHEDULING_DT, current_dt) task = None enabled_tasks = 0 next_executed_task = None utc = pytz.timezone("UTC") try: for task in [t for t in TaskConfiguration(context=self._context, logger=self._logger).get_tasks() if t.get(handlers.TASK_INTERVAL) is not None and t.get(handlers.TASK_ENABLED, True)]: enabled_tasks += 1 self._logger.debug_enabled = task[handlers.TASK_DEBUG] task_name = task[handlers.TASK_NAME] # timezone for specific task task_timezone = pytz.timezone(task[handlers.TASK_TIMEZONE]) # create cron expression to test if task needs te be executed task_cron_expression = CronExpression(expression=task[handlers.TASK_INTERVAL]) localized_last_run = last_run_dt.astimezone(task_timezone) localized_current_dt = current_dt.astimezone(task_timezone) next_execution = task_cron_expression.first_within_next(timedelta(hours=24), localized_current_dt) next_execution_utc = next_execution.astimezone(utc) if next_execution else None if next_execution_utc is not None: if next_executed_task is None or next_execution_utc < next_executed_task[0]: next_executed_task = (next_execution_utc, task) if already_ran_this_minute: continue # test if task needs te be executed since last run of ops automator execute_dt_since_last = task_cron_expression.last_since(localized_last_run, localized_current_dt) if execute_dt_since_last is None: if next_execution is not None: next_execution = next_execution.astimezone(task_timezone) self._logger.info(INFO_NEXT_EXECUTION, task_name, next_execution.isoformat(), task_timezone) else: self._logger.info(INFO_NO_NEXT_WITHIN, task_name) continue started_tasks.append(task_name) self._logger.debug(INFO_SCHEDULED_TASK, task_name, execute_dt_since_last, task_timezone, str(safe_json(task, indent=2))) # create an event for lambda function that starts execution by selecting for resources for this task self._execute_task(task, execute_dt_since_last) if started_tasks: self._logger.info(INFO_STARTED_TASKS, enabled_tasks, ",".join(started_tasks)) else: self._logger.info(INFO_NO_TASKS_STARTED, enabled_tasks) self._set_next_schedule_event(current_dt, next_executed_task) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INFO_RESULT, running_time) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "event-datetime": current_dt.isoformat(), "enabled_tasks": enabled_tasks, "started-tasks": started_tasks }) except ValueError as ex: self._logger.error("{}\n{}".format(ex, safe_json(task, indent=2))) finally: self._logger.flush()