def __init__(self, event, context):
        """
        Initializes the instance.
        :param event: event to handle
        :param context: CLambda context
        """
        self._context = context
        self._event = event
        self._table = None

        # Setup logging
        classname = self.__class__.__name__
        dt = datetime.utcnow()
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream,
                                    buffersize=50,
                                    context=context)

        self.configuration_update = ScheduleHandler.is_config_update(
            self._event)
        if self.configuration_update:
            if "OldImage" in self._event["Records"][0]["dynamodb"]:
                self.updated_task = self._event["Records"][0]["dynamodb"][
                    "OldImage"][configuration.CONFIG_TASK_NAME]["S"]
            else:
                self.updated_task = self._event["Records"][0]["dynamodb"][
                    "NewImage"][configuration.CONFIG_TASK_NAME]["S"]

        self.execute_task_request = self.is_execute_event(self._event)
        self.executed_task_name = event.get(
            handlers.HANDLER_EVENT_TASK_NAME,
            "") if self.execute_task_request else None
    def __init__(self, event, context):
        """
        Initializes handle instance
        :param event: event to handle
        :param context: lambda context
        """
        self._event = event
        self._context = context
        self._logger = None

        self.additional_parameters = {}

        self.commands = {
            "describe-tasks":
            "get_tasks" if self.parameters.get("name") is None else "get_task",
            "start-task":
            "start_task"
        }

        self.attribute_transformations = {}
        self.result_transformations = {}

        # Setup logging
        classname = self.__class__.__name__
        dt = datetime.utcnow()
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream,
                                    buffersize=20,
                                    context=self._context)
Ejemplo n.º 3
0
    def __init__(self, event, context):
        """
        Initializes the instance.
        :param event: Handled event
        :param context: Context if running in Lambda
        """
        self._context = context
        self._event = event
        self._tracking_table = None
        self._concurrency_table = None
        self.started_tasks = 0
        self.started_waiting_tasks = 0
        self.waiting_for_execution_tasks = 0
        self.started_completion_checks = 0
        self.finished_concurrency_tasks = 0
        self.done_work = False
        self.invoked_lambda_functions = []

        self.events_client = None
        self._s3_client = None
        self._db_client = None

        # setup logging
        classname = self.__class__.__name__
        dt = datetime.utcnow()
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream,
                                    context=self._context,
                                    buffersize=20,
                                    debug=os.getenv(
                                        ENV_DEBUG_TASK_TACKING_HANDLER,
                                        "false").lower() == "true")
Ejemplo n.º 4
0
    def __init__(self, event, context):
        """
        Initializes helper setup class
        :param event: 
        :param context: 
        """
        CustomResource.__init__(self, event, context)

        self.arguments = copy(self.resource_properties)
        self.arguments = {a: self.resource_properties[a] for a in self.resource_properties if a not in ["ServiceToken",
                                                                                                        "Timeout"]}

        self.configuration_bucket = os.getenv(configuration.ENV_CONFIG_BUCKET, None)
        self.automator_role_arn = self.arguments.get("OpsAutomatorLambdaRole")
        self.events_forward_role = self.arguments.get("EventForwardLambdaRole")
        self.ops_automator_topic_arn = self.arguments.get("OpsAutomatorTopicArn")
        self.use_ecs = TaskConfiguration.as_boolean(self.arguments.get("UseEcs", False))
        self.optimize_cross_account_template = TaskConfiguration.as_boolean(
            (self.arguments.get("OptimizeCrossAccountTemplate", False)))

        self.account = os.getenv(handlers.ENV_OPS_AUTOMATOR_ACCOUNT)

        self.stack_version = self.arguments["StackVersion"]

        # setup logging
        dt = datetime.utcnow()
        classname = self.__class__.__name__
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream, context=context, buffersize=50)
    def __init__(self, event, context):
        """
        Initializes the instance.
        :param event: event to handle
        :param context: Lambda context
        """
        self._context = context
        self._event = event
        self._table = None

        # Setup logging
        classname = self.__class__.__name__
        dt = datetime.utcnow()
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream, buffersize=20, context=context)
    def _logger(self):
        if self.__logger is None:
            # setup logging
            if self.execution_log_stream is None:

                if callable(
                        getattr(self._action_class, "action_logging_subject",
                                None)):
                    # noinspection PyBroadException
                    try:
                        action_subject = self._action_class.action_logging_subject(
                            self._action_arguments, self.action_parameters)
                        self.execution_log_stream = "{}-{}".format(
                            self._event[handlers.TASK_TR_NAME], action_subject)
                    except Exception as ex:
                        print((ERR_BUILDING_SUBJECT_FOR_LOG_STREAM,
                               str(self._action_class), ex))
                        action_subject = "unknown-"
                        self.execution_log_stream = LOG_STREAM.format(
                            self._event[handlers.TASK_TR_NAME], action_subject,
                            actions.log_stream_datetime(),
                            self._action_arguments.get(handlers.TASK_TR_ID,
                                                       "None"))
            else:
                self.execution_log_stream = self.execution_log_stream
            self.__logger = QueuedLogger(logstream=self.execution_log_stream,
                                         buffersize=50 if self.debug else 20,
                                         context=self._context,
                                         debug=self.debug)
        return self.__logger
Ejemplo n.º 7
0
    def __init__(self, event, context):
        CustomResource.__init__(self, event, context)

        self.arguments = copy(self.resource_properties)
        self.arguments = {
            a: self.resource_properties[a]
            for a in self.resource_properties
            if a not in ["ServiceToken", "Timeout"]
        }
        # setup logging
        dt = datetime.utcnow()
        classname = self.__class__.__name__
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream,
                                    context=context,
                                    buffersize=20)
Ejemplo n.º 8
0
 def _get_logger(class_name, context, account=None, region=None):
     dt = datetime.utcnow()
     log_stream = LOG_STREAM.format(
         class_name, account + "-" if account is not None else "",
         region + "-" if region is not None else "", dt.year, dt.month,
         dt.day)
     return QueuedLogger(logstream=log_stream,
                         buffersize=20,
                         context=context)
Ejemplo n.º 9
0
class SetupHelperHandler(CustomResource):

    def __init__(self, event, context):
        """
        Initializes helper setup class
        :param event: 
        :param context: 
        """
        CustomResource.__init__(self, event, context)

        self.arguments = copy(self.resource_properties)
        self.arguments = {a: self.resource_properties[a] for a in self.resource_properties if a not in ["ServiceToken",
                                                                                                        "Timeout"]}

        self.configuration_bucket = os.getenv(configuration.ENV_CONFIG_BUCKET, None)
        self.automator_role_arn = self.arguments.get("OpsAutomatorLambdaRole")
        self.events_forward_role = self.arguments.get("EventForwardLambdaRole")
        self.ops_automator_topic_arn = self.arguments.get("OpsAutomatorTopicArn")
        self.use_ecs = TaskConfiguration.as_boolean(self.arguments.get("UseEcs", False))
        self.optimize_cross_account_template = TaskConfiguration.as_boolean(
            (self.arguments.get("OptimizeCrossAccountTemplate", False)))

        self.account = os.getenv(handlers.ENV_OPS_AUTOMATOR_ACCOUNT)

        self.stack_version = self.arguments["StackVersion"]

        # setup logging
        dt = datetime.utcnow()
        classname = self.__class__.__name__
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream, context=context, buffersize=50)

    @classmethod
    def is_handling_request(cls, event, _):
        """
        Test if the event is handled by this handler
        :param _:
        :param event: Event to test
        :return: True if the event is an event from cloudformationOpsAutomatorSetupHelper custom resource
        """
        return event.get("StackId") is not None and event.get("ResourceType") == "Custom::OpsAutomatorSetupHelper"

    def handle_request(self):
        """
        Handles the custom resource request from cloudformation
        :return: 
        """

        start = datetime.now()

        self._logger.info("Cloudformation request is {}", safe_json(self._event, indent=2))

        try:
            result = CustomResource.handle_request(self)

            return safe_dict({
                "result": result,
                "datetime": datetime.now().isoformat(),
                "running-time": (datetime.now() - start).total_seconds()
            })
        except Exception as ex:
            self._logger.error(ERR_HANDLING_SETUP_REQUEST, ex, full_stack())
            raise ex

        finally:
            self._logger.flush()

    def _set_lambda_logs_retention_period(self):
        """
        Aligns retention period for default Lambda log streams with settings
        :return: 
        """

        if self._context is None:
            return

        log_client = get_client_with_retries("logs",
                                             methods=[
                                                 "delete_retention_policy",
                                                 "put_retention_policy",
                                                 "create_log_group",
                                                 "describe_log_groups"
                                             ],
                                             context=self.context)

        retention_days = self.arguments.get("LogRetentionDays")

        base_name = self._context.log_group_name[0:-len("Standard")]

        log_groups = [
            base_name + size for size in [
                "Standard",
                "Medium",
                "Large",
                "XLarge",
                "XXLarge",
                "XXXLarge"
            ]
        ]

        existing_groups = [l["logGroupName"] for l in
                           log_client.describe_log_groups_with_retries(logGroupNamePrefix=base_name).get("logGroups", [])]

        for group in log_groups:
            exists = group in existing_groups
            self._logger.info("Setting retention for log group {}", group)
            if retention_days is None:
                if not exists:
                    continue
                self._logger.info(INF_DELETE_LOG_RETENTION_POLICY, group)
                log_client.delete_retention_policy_with_retries(logGroupName=group)
            else:
                if not exists:
                    log_client.create_log_group(logGroupName=group)
                self._logger.info(INF_SET_LOG_RETENTION_POLICY, group, retention_days)
                log_client.put_retention_policy_with_retries(logGroupName=group, retentionInDays=int(retention_days))

    def _setup(self):
        """
        OpsAutomatorSetupHelper setup actions
        :return: 
        """
        self._set_lambda_logs_retention_period()
        if self.configuration_bucket:
            self.generate_templates()

    def _send_create_metrics(self):

        metrics_data = {
            "Type": "stack",
            "Version": self.stack_version,
            "StackHash": sha256(self.stack_id.encode()).hexdigest(),
            "Data": {
                "Status": "stack_create",
                "Region": self.region
            }
        }

        send_metrics_data(metrics_data=metrics_data, logger=self._logger)

    def _send_delete_metrics(self):

        metrics_data = {
            "Type": "stack",
            "Version": self.stack_version,
            "StackHash": sha256(self.stack_id.encode()).hexdigest(),
            "Data": {
                "Status": "stack_delete",
                "Region": self.region
            }
        }

        send_metrics_data(metrics_data=metrics_data, logger=self._logger)

    def _create_request(self):
        """
        Handles create request from cloudformation custom resource
        :return: 
        """

        try:
            self._setup()
            self.physical_resource_id = self.__class__.__name__.lower()
            if allow_send_metrics():
                self._send_create_metrics()
            return True

        except Exception as ex:
            self.response["Reason"] = str(ex)
            return False

    def _update_request(self):
        """
        Handles update request from cloudformation custom resource
        :return: 
        """

        try:
            self._setup()
            return True

        except Exception as ex:
            self.response["Reason"] = str(ex)
            return False

    def _delete_request(self):
        """
        Handles delete request from cloudformation custom resource
        :return: 
        """

        try:
            self.delete_templates()
            self.delete_external_task_config_stacks()
            if allow_send_metrics():
                self._send_delete_metrics()
            return True

        except Exception as ex:
            self.response["Reason"] = str(ex)
            return False

    def delete_external_task_config_stacks(self):
        """
        Deletes external stacks that were used to create configuration items
        :return: 
        """

        self._logger.info(INF_DELETING_STACKS)

        stacks = TaskConfiguration(context=self.context, logger=self._logger).get_external_task_configuration_stacks()

        if len(stacks) == 0:
            self._logger.info(INF_NO_STACKS)
            return

        self._logger.info(INF_DELETED_STACKS, ", ".join(stacks))

        cfn = boto3.resource("cloudformation")

        for s in stacks:
            self._logger.info(INF_STACK)
            try:
                stack = cfn.Stack(s)
                add_retry_methods_to_resource(stack, ["delete"], context=self.context)
                stack.delete_with_retries()
            except Exception as ex:
                self._logger.error(ERR_DELETING_STACK, s, str(ex))

    def generate_templates(self):
        """
        Generates configuration and cross-account role templates
        :return: 
        """

        def generate_configuration_template(s3, builder, action):
            configuration_template = S3_KEY_ACTION_CONFIGURATION_TEMPLATE.format(action)
            self._logger.info(INF_CREATE_ACTION_TEMPLATE, action, configuration_template)
            template = json.dumps(builder.build_template(action), indent=3)
            s3.put_object_with_retries(Body=template, Bucket=self.configuration_bucket, Key=configuration_template)

        def generate_all_actions_cross_account_role_template_parameterized(s3, builder, all_act, template_description):
            self._logger.info(INF_CREATE_ALL_ACTIONS_CROSS_ROLES_TEMPLATE, S3_KEY_ACCOUNT_CONFIG_WITH_PARAMS)

            template = builder.build_template(action_list=all_act, description=template_description, with_conditional_params=True)
            if self.optimize_cross_account_template:
                template = CrossAccountRoleBuilder.compress_template(template)
            template_json = json.dumps(template, indent=3)
            s3.put_object_with_retries(Body=template_json, Bucket=self.configuration_bucket, Key=S3_KEY_ACCOUNT_CONFIG_WITH_PARAMS)

        # noinspection PyUnusedLocal
        def generate_all_actions_cross_account_role_template(s3, builder, all_act, template_description):
            self._logger.info(INF_CREATE_ALL_ACTIONS_CROSS_ROLES_TEMPLATE, S3_KEY_ACCOUNT_CONFIG_CREATE_ALL)
            template = json.dumps(
                builder.build_template(action_list=all_act, description=template_description, with_conditional_params=False),
                indent=3)
            s3.put_object_with_retries(Body=template, Bucket=self.configuration_bucket, Key=S3_KEY_ACCOUNT_CONFIG_CREATE_ALL)

        def generate_forward_events_template(s3):
            self._logger.info(INF_CREATE_EVENT_FORWARD_TEMPLATE, S3_KEY_ACCOUNT_EVENTS_FORWARD_TEMPLATE)
            template = build_events_forward_template(template_filename="./cloudformation/{}".format(FORWARD_EVENTS_TEMPLATE),
                                                     script_filename="./forward-events.py",
                                                     stack=self.stack_name,
                                                     event_role_arn=self.events_forward_role,
                                                     ops_automator_topic_arn=self.ops_automator_topic_arn,
                                                     version=self.stack_version)

            s3.put_object_with_retries(Body=template, Bucket=self.configuration_bucket, Key=S3_KEY_ACCOUNT_EVENTS_FORWARD_TEMPLATE)

        def generate_scenario_templates(s3):
            self._logger.info("Creating task scenarios templates")

            for template_name, template in list(builders.build_scenario_templates(templates_dir="./cloudformation/scenarios",
                                                                                  stack=self.stack_name)):
                self._logger.info(INF_SCENARIO_TEMPLATE, template_name, S3_KEY_SCENARIO_TEMPLATE_BUCKET)
                s3.put_object_with_retries(Body=template,
                                           Bucket=self.configuration_bucket,
                                           Key=S3_KEY_SCENARIO_TEMPLATE_KEY.format(template_name))

        def generate_custom_resource_builder(s3):
            self._logger.info("Create custom resource builder script {}", S3_KEY_CUSTOM_RESOURCE_BUILDER)

            with open("./build_task_custom_resource.py", "rt") as f:
                script_text = "".join(f.readlines())
                script_text = script_text.replace("%stack%", self.stack_name)
                script_text = script_text.replace("%account%", self.account)
                script_text = script_text.replace("%region%", self.region)
                script_text = script_text.replace("%config_table%", os.getenv("CONFIG_TABLE"))

            s3.put_object_with_retries(Body=script_text, Bucket=self.configuration_bucket, Key=S3_KEY_CUSTOM_RESOURCE_BUILDER)

        def generate_actions_html_page(s3):
            self._logger.info("Generating Actions HTML page {}", S3_KEY_ACTIONS_HTML_PAGE)
            html = builders.generate_html_actions_page(html_file="./builders/actions.html", region=self.region)
            s3.put_object_with_retries(Body=html, Bucket=self.configuration_bucket, Key=S3_KEY_ACTIONS_HTML_PAGE,
                                       ContentType="text/html")

        self._logger.info(INF_GENERATING_TEMPLATES, self.configuration_bucket)
        try:
            stack = os.getenv(handlers.ENV_STACK_NAME, "")
            s3_client = get_client_with_retries("s3", ["put_object"], context=self.context)
            config_template_builder = ActionTemplateBuilder(self.context,
                                                            service_token_arn="arn:aws:region:account:function:used-for-debug-only",
                                                            ops_automator_role=self.automator_role_arn,
                                                            use_ecs=self.use_ecs)
            role_template_builder = CrossAccountRoleBuilder(self.automator_role_arn, stack)

            all_actions = []
            for action_name in actions.all_actions():
                action_properties = actions.get_action_properties(action_name)
                if not action_properties.get(actions.ACTION_INTERNAL, False):
                    generate_configuration_template(s3_client, config_template_builder, action_name)
                    # Enable to generate a template for every individual action
                    # description = TEMPLATE_DESC_CROSS_ACCOUNT_ACTION.format(action_name, stack, account)
                    # generate_action_cross_account_role_template(s3_client, role_template_builder, action_name, description)
                    all_actions.append(action_name)

            if len(all_actions) > 0:
                description = TEMPLATE_DESC_ALL_ACTIONS_PARAMETERS.format(stack, self.account)
                generate_all_actions_cross_account_role_template_parameterized(s3_client, role_template_builder, all_actions,
                                                                               description)
            # enable to generate a template with all actions enabled
            #     description = TEMPLATE_DESC_ALL_ACTIONS.format(stack, account)
            #     generate_all_actions_cross_account_role_template(s3_client, role_template_builder, all_actions, description)

            for action_name in actions.all_actions():
                action_properties = actions.get_action_properties(action_name)
                if action_properties.get(actions.ACTION_EVENTS, None) is not None:
                    generate_forward_events_template(s3_client)
                    break

            generate_actions_html_page(s3_client)

            generate_scenario_templates(s3_client)

            generate_custom_resource_builder(s3_client)

        except Exception as ex:
            self._logger.error(ERR_BUILDING_TEMPLATES, str(ex), full_stack())

    def delete_templates(self):

        s3_client = get_client_with_retries("s3", ["delete_object"], context=self.context)
        s3_key = ""
        try:
            for action_name in actions.all_actions():
                action_properties = actions.get_action_properties(action_name)
                if not action_properties.get(actions.ACTION_INTERNAL, False):
                    self._logger.info(INF_DELETING_ACTION_TEMPLATE, action_name)
                    s3_key = S3_KEY_ACTION_CONFIGURATION_TEMPLATE.format(action_name)
                    s3_client.delete_object_with_retries(Bucket=self.configuration_bucket, Key=s3_key)
        except Exception as ex:
            self._logger.error(ERR_DELETE_CONFIG_ITEM, s3_key, self.configuration_bucket, str(ex))

            self._logger.info(INF_DELETE_ALL_ACTIONS_TEMPLATE)
            for key in [S3_KEY_ACTIONS_HTML_PAGE,
                        S3_KEY_ACCOUNT_CONFIG_WITH_PARAMS,
                        S3_KEY_ACCOUNT_CONFIG_CREATE_ALL,
                        S3_KEY_ACCOUNT_EVENTS_FORWARD_TEMPLATE]:
                try:
                    s3_client.delete_object_with_retries(Bucket=self.configuration_bucket, Key=key)
                except Exception as ex:
                    self._logger.error(ERR_DELETE_CONFIG_ITEM, key, self.configuration_bucket, str(ex))
Ejemplo n.º 10
0
class TaskTrackingHandler(object):
    """
    Class to handle events triggered by inserting new items in the actions tracking table.
    """
    def __init__(self, event, context):
        """
        Initializes the instance.
        :param event: Handled event
        :param context: Context if running in Lambda
        """
        self._context = context
        self._event = event
        self._tracking_table = None
        self._concurrency_table = None
        self.started_tasks = 0
        self.started_waiting_tasks = 0
        self.waiting_for_execution_tasks = 0
        self.started_completion_checks = 0
        self.finished_concurrency_tasks = 0
        self.done_work = False
        self.invoked_lambda_functions = []

        self.events_client = None
        self._s3_client = None
        self._db_client = None

        # setup logging
        classname = self.__class__.__name__
        dt = datetime.utcnow()
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream,
                                    context=self._context,
                                    buffersize=20,
                                    debug=os.getenv(
                                        ENV_DEBUG_TASK_TACKING_HANDLER,
                                        "false").lower() == "true")

    @classmethod
    def is_handling_request(cls, event, context):

        # In simulation the handler is called directly when inserting or updating items in the table
        if handlers.running_local(context):
            return False

        if event.get("Records", [{}])[0].get("eventSource",
                                             "") != "aws:dynamodb":
            return False

        source_arn = event["Records"][0]["eventSourceARN"]
        table_name = source_arn.split("/")[1]
        return table_name in [
            os.getenv(handlers.ENV_ACTION_TRACKING_TABLE),
            os.getenv(handlers.ENV_CONCURRENCY_TABLE)
        ]

    @classmethod
    def task_string(cls, action):
        return TASK_ACTION_STRINGS[action] if 0 <= action < len(
            TASK_ACTION_STRINGS) else "Unknown"

    @property
    def tracking_table(self):
        """
        Gets an instance of the tracking table and use it in subsequent calls
        :return: Instance tracking table
        """
        if self._tracking_table is None:
            self._tracking_table = TaskTrackingTable(self._context,
                                                     self._logger)
        return self._tracking_table

    @property
    def s3_client(self):

        if self._s3_client is None:
            self._s3_client = boto_retry.get_client_with_retries(
                "s3", ["delete_item"], logger=self._logger)
        return self._s3_client

    @property
    def db_client(self):

        if self._db_client is None:
            self._db_client = boto_retry.get_client_with_retries(
                "dynamodb", ["delete_item"], logger=self._logger)
        return self._db_client

    def _get_action_concurrency_key(self, item):
        """
        Gets the concurrency key for a tasks action
        :param item: The task item
        :return: The concurrency key for the tasks action
        """
        action = item[handlers.TASK_TR_ACTION]
        # get the name of the optional method to return the concurrency key
        action_class = actions.get_action_class(action)
        concurrency_key_method = getattr(
            action_class, handlers.ACTION_CONCURRENCY_KEY_METHOD, None)

        # prepare parameters for calling static function that returns the concurrency key
        if concurrency_key_method is not None:
            get_key_params = {
                actions.ACTION_PARAM_RESOURCES:
                handlers.get_item_resource_data(item, self._context),
                actions.ACTION_PARAM_ACCOUNT:
                item[handlers.TASK_TR_ACCOUNT],
                actions.ACTION_PARAM_STACK:
                os.getenv(handlers.ENV_STACK_NAME),
                actions.ACTION_PARAM_STACK_ID:
                os.getenv(handlers.ENV_STACK_ID),
                actions.ACTION_PARAM_TASK_ID:
                item[handlers.TASK_TR_ID],
                actions.ACTION_PARAM_TASK:
                item[handlers.TASK_TR_NAME]
            }
            get_key_params.update(item.get(handlers.TASK_TR_PARAMETERS))
            return concurrency_key_method(get_key_params)
        else:
            # if this method is not available for action then use the name of the action as the key
            return action

    def _enter_waiting_list(self, concurrency_key):
        """
        Adds 1 to waiting list counter for the specified concurrency key and returns new value
        :param concurrency_key: Concurrency key for counter
        :return: Updated counter
        """

        # update/read counter for the concurrency key
        if not handlers.running_local(self._context):
            resp = self.concurrency_table.update_item_with_retries(
                Key={CONCURRENCY_ID: concurrency_key},
                UpdateExpression="ADD InstanceCount :one SET RunNext=:run",
                ExpressionAttributeValues={
                    ":one": 1,
                    ":run": False
                },
                ReturnValues="UPDATED_NEW")
            return int(resp["Attributes"].get("InstanceCount", 0))
        else:
            resp = self.concurrency_table.get_item_with_retries(
                Key={CONCURRENCY_ID: concurrency_key})
            return resp.get("Item", {}).get(ACTIVE_INSTANCES, 0)

    def _leave_waiting_list(self, task_id, concurrency_key):
        """
        Subtracts 1 from waiting list counter for the specified concurrency key and returns new value. If the counter reaches 0
        then the entry for the concurrency key is removed
        :param concurrency_key: Concurrency key for counter
        :return: Updated counter
        """

        # make a consistent read of the task
        self.tracking_table.get_task_item(task_id)

        if not handlers.running_local(self._context):

            resp = self.concurrency_table.update_item_with_retries(
                Key={CONCURRENCY_ID: concurrency_key},
                UpdateExpression="ADD InstanceCount :min_one SET RunNext=:run",
                ExpressionAttributeValues={
                    ":min_one": -1,
                    ":run": True
                },
                ReturnValues="UPDATED_NEW")
            count = max(0, int(resp["Attributes"].get(ACTIVE_INSTANCES, 0)))

            # remove entry if no more waiting items for this key
            if count == 0:
                self.concurrency_table.delete_item_with_retries(
                    Key={CONCURRENCY_ID: concurrency_key})
        else:
            resp = self.concurrency_table.get_item_with_retries(
                Key={CONCURRENCY_ID: concurrency_key})
            count = resp.get("Item", {}).get(ACTIVE_INSTANCES, 0)
            TaskTrackingTable._run_local_stream_event(
                os.getenv(handlers.ENV_CONCURRENCY_TABLE), "UPDATE", {
                    "ConcurrencyId": concurrency_key,
                    "InstanceCount": count
                }, {
                    "ConcurrencyId": concurrency_key,
                    "InstanceCount": count + 1
                }, self._context)

        return count

    @property
    def concurrency_table(self):
        """
        Returns table to store last execution time for this handler.
        :return: table to store last execution time for this handler
        """
        if self._concurrency_table is None:
            tablename = os.getenv(handlers.ENV_CONCURRENCY_TABLE)

            self._logger.debug("Using concurrency table {}", tablename)

            self._concurrency_table = services.get_session().resource(
                "dynamodb").Table(tablename)
            boto_retry.add_retry_methods_to_resource(
                self._concurrency_table,
                ["update_item", "get_item", "delete_item"],
                context=self._context)

        return self._concurrency_table

    def _is_wait_listed(self, item):
        """
        Test if there is a max concurrency level for the tasks action. If this is the case then a concurrency key is retrieved
        from the action and it is used to update the counter in the concurrency table for that key. The updated counter is tested
        against the max concurrency level for the tasks action
        :param item: task item
        :return: True if counter for tasks action concurrency key > mac concurrency level, False if it is less or equal or the
        action has no max concurrency level
        """

        action = item.get(handlers.TASK_TR_ACTION, None)
        if action is None:
            return False

        action_properties = actions.get_action_properties(action)

        # test if there are concurrency restrictions
        max_action_concurrency = action_properties.get(
            actions.ACTION_MAX_CONCURRENCY)

        # no maximum
        if max_action_concurrency in [None, 0]:
            return False

        # property may be a lambda function, call the function with parameters of task as lambda parameters
        if types.FunctionType == type(max_action_concurrency):
            parameters = item[handlers.TASK_TR_PARAMETERS]
            max_action_concurrency = max_action_concurrency(parameters)
            if max_action_concurrency in [None, 0]:
                return False

        # get the key for the tasks action
        concurrency_key = self._get_action_concurrency_key(item)
        # enter the waiting list for that key
        count = int(self._enter_waiting_list(concurrency_key))

        # set status to waiting if count > max concurrency level
        status = handlers.STATUS_WAITING if count >= int(
            max_action_concurrency) else None

        # store the concurrency key twice, the concurrency id is used for the index in the GSI and is removed after the
        # action is handled so it does not longer show in the GSI, but we keep another  copy in the task tracking table that
        # we need to decrement the counter in the waiting list and possible start waiting instances with the same key
        self.tracking_table.update_task(item[handlers.TASK_TR_ID],
                                        task=item[handlers.TASK_TR_NAME],
                                        task_metrics=item.get(
                                            handlers.TASK_TR_METRICS, False),
                                        status=status,
                                        status_data={
                                            handlers.TASK_TR_CONCURRENCY_KEY:
                                            concurrency_key,
                                            handlers.TASK_TR_CONCURRENCY_ID:
                                            concurrency_key
                                        })

        if count > max_action_concurrency:
            self._logger.debug(DEBUG_WAITING, item[handlers.TASK_TR_ACTION],
                               concurrency_key, count, max_action_concurrency,
                               item[handlers.TASK_TR_ID])
            return True

        return False

    def _start_task_execution(self,
                              task_item,
                              action=handlers.HANDLER_ACTION_EXECUTE):
        """
        Creates an instance of the lambda function that executes the tasks action. It first checks is the action has specific memory
        requirements and based on this it creates a copy of this instance or one configured for the required memory. All
        information for executing the action is passed in the event.
        :param task_item: Task item for which action is executed
        :return:
        """

        try:

            self._logger.debug(
                "Entering start_task_execution ({}) with task {}", action,
                safe_json(task_item, indent=3))

            # Create event for execution of the action and set its action so that is picked up by the execution handler
            event = {i: task_item.get(i) for i in task_item}
            event[handlers.HANDLER_EVENT_ACTION] = action

            self._logger.debug(DEBUG_ACTION,
                               task_item[handlers.TASK_TR_ACTION],
                               task_item[handlers.TASK_TR_NAME],
                               task_item[handlers.TASK_TR_ID])

            self._logger.debug(
                DEBUG_ACTION_PARAMETERS,
                safe_json(task_item.get(handlers.TASK_TR_PARAMETERS, {}),
                          indent=3))

            # get memory allocation for executing the task
            lambda_size = handlers.TASK_TR_COMPLETION_SIZE \
                if action == handlers.HANDLER_ACTION_TEST_COMPLETION \
                else handlers.TASK_TR_EXECUTE_SIZE

            execute_lambda_size = task_item.get(lambda_size,
                                                actions.ACTION_SIZE_STANDARD)

            if execute_lambda_size == actions.ACTION_USE_ECS:
                ecs_memory = task_item.get(
                    handlers.TASK_EXECUTE_ECS_MEMORY
                    if action == handlers.HANDLER_ACTION_EXECUTE else
                    handlers.TASK_COMPLETION_ECS_MEMORY, None)
            else:
                ecs_memory = None

            if not handlers.running_local(self._context):

                self._logger.debug(DEBUG_MEMORY_SIZE, execute_lambda_size)

                if execute_lambda_size != actions.ACTION_USE_ECS:

                    # create event payload
                    payload = str.encode(safe_json(event))

                    # determine which lambda to execute on
                    function_name = "{}-{}-{}".format(
                        os.getenv(handlers.ENV_STACK_NAME),
                        os.getenv(handlers.ENV_LAMBDA_NAME),
                        execute_lambda_size)

                    self._logger.debug(
                        "Running execution of task on lambda function {}",
                        function_name)

                    self._logger.debug(DEBUG_LAMBDA_FUNCTION_, function_name,
                                       payload)
                    # start lambda function
                    lambda_client = boto_retry.get_client_with_retries(
                        "lambda", ["invoke"],
                        context=self._context,
                        logger=self._logger)
                    resp = lambda_client.invoke_with_retries(
                        FunctionName=function_name,
                        InvocationType="Event",
                        LogType="None",
                        Payload=payload)

                    task_info = {
                        "id": task_item[handlers.TASK_TR_ID],
                        "task": task_item[handlers.TASK_TR_NAME],
                        "action": task_item[handlers.TASK_TR_ACTION],
                        "payload": payload,
                        "status-code": resp["StatusCode"]
                    }

                    self._logger.debug(DEBUG_LAMBDA,
                                       safe_json(task_info, indent=2))
                    self.invoked_lambda_functions.append(task_info)
                else:
                    # run as ECS job
                    ecs_args = {
                        "subnets": os.getenv('AWSVPC_SUBNETS'),
                        "securitygroups": os.getenv('AWSVPC_SECURITYGROUPS'),
                        "assignpublicip": os.getenv('AWSVPC_ASSIGNPUBLICIP'),
                        handlers.HANDLER_EVENT_ACTION: action,
                        handlers.TASK_NAME: task_item[handlers.TASK_TR_NAME],
                        handlers.TASK_TR_ID: task_item[handlers.TASK_TR_ID]
                    }

                    self._logger.debug(DEBUG_RUNNING_ECS_TASK, action,
                                       task_item[handlers.TASK_TR_NAME])
                    handlers.run_as_ecs_job(ecs_args,
                                            ecs_memory_size=ecs_memory,
                                            context=self._context,
                                            logger=self._logger)

            else:
                lambda_handler(event, self._context)

            ResultNotifications(context=self._context,
                                logger=self._logger).publish_started(task_item)

        except Exception as ex:
            self._logger.error(ERR_RUNNING_TASK, task_item, str(ex),
                               full_stack())

    def _handle_new_task_item(self, task_item):
        """
        Handles stream updates for new tasks added to the task tracking table
        :param task_item:
        :return:
        """
        self._logger.debug("Handling new task logic")
        # tasks can be wait listed if there is a max concurrency level for its action
        if self._is_wait_listed(task_item):
            self.waiting_for_execution_tasks += 1
            return

        # if not wait listed start the action for the task
        self.started_tasks += 1
        self._start_task_execution(task_item)

    def _handle_completed_concurrency_item(self, task_item):

        self._logger.debug("Handling completed concurrency logic")
        # gets the concurrency key for the task
        concurrency_key = task_item[handlers.TASK_TR_CONCURRENCY_KEY]
        self._logger.debug("Handling completed task with ConcurrencyKey {}",
                           concurrency_key)

        # use the concurrency key to decrement the counter for that key in the waiting list
        count = self._leave_waiting_list(task_item[handlers.TASK_TR_ID],
                                         concurrency_key)
        self._logger.debug("Concurrency count for ConcurrencyKey {} is {}",
                           concurrency_key, count)

        self.finished_concurrency_tasks += 1

        ResultNotifications(context=self._context,
                            logger=self._logger).publish_ended(task_item)

    def _handle_finished_task_without_completion(self, task_item):
        ResultNotifications(context=self._context,
                            logger=self._logger).publish_ended(task_item)

    def _handle_start_waiting_action(self, concurrency_item):

        self._logger.debug("Handling start waiting task logic")
        # gets the concurrency key for the task
        concurrency_id = concurrency_item[handlers.TASK_TR_CONCURRENCY_ID]
        self._logger.debug("Handling completed task with ConcurrencyId {}",
                           concurrency_id)

        waiting_list = self.tracking_table.get_waiting_tasks(concurrency_id)

        self._logger.debug(
            " List of waiting tasks for ConcurrencyKey {} is {}",
            concurrency_id, safe_json(waiting_list, indent=3))
        if len(waiting_list) > 0:
            count = concurrency_item.get(ACTIVE_INSTANCES, 0)
            oldest_waiting_task = sorted(
                waiting_list, key=lambda w: w[handlers.TASK_TR_CREATED_TS])[0]
            self._logger.debug(DEBUG_START_WAITING, concurrency_id, count,
                               oldest_waiting_task[handlers.TASK_TR_ACTION],
                               oldest_waiting_task[handlers.TASK_TR_NAME],
                               oldest_waiting_task[handlers.TASK_TR_ID])
            self.started_waiting_tasks += 1
            self._start_task_execution(oldest_waiting_task)

    def _handle_check_completion(self, task_item):
        self._logger.debug("Handling test for completion logic")
        if task_item.get(handlers.TASK_TR_RUN_LOCAL,
                         False) and not handlers.running_local(self._context):
            self._logger.debug("Item running in local mode skipped")
            return

        self.started_completion_checks += 1
        self._start_task_execution(
            task_item=task_item,
            action=handlers.HANDLER_ACTION_TEST_COMPLETION)

    def _handle_deleted_item(self, task_item):

        if task_item.get(handlers.TASK_TR_S3_RESOURCES, False):

            bucket = os.getenv(handlers.ENV_RESOURCE_BUCKET)
            key = task_item[handlers.TASK_TR_ID] + ".json"
            try:
                self._logger.debug(DEBUG_DELETING_RESOURCES_FROM_S3, bucket,
                                   key)
                self.s3_client.delete_object_with_retries(Bucket=bucket,
                                                          Key=key)
            except Exception as ex:
                self._logger.warning(WARN_DELETING_RESOURCES, bucket, key, ex)

    def handle_request(self):
        """
        Handles the event triggered by updates to the actions tracking table.
        :return: results of handling selected updates
        """
        def tasks_items_to_execute():
            """
            Generator function that selects all record items from the event that need processing.
            :return:
            """
            def table_name(rec):
                source_arn = rec["eventSourceARN"]
                return source_arn.split("/")[1]

            def from_tracking_table(rec):
                return table_name(rec) == os.getenv(
                    handlers.ENV_ACTION_TRACKING_TABLE)

            def from_concurrency_table(rec):
                return table_name(rec) == os.getenv(
                    handlers.ENV_CONCURRENCY_TABLE)

            def get_old_image(task_record):
                return task_record["dynamodb"].get("OldImage", {})

            def get_new_image(task_record):
                return task_record["dynamodb"].get("NewImage", {})

            def get_new_status(task_record):
                return get_new_image(task_record).get(handlers.TASK_TR_STATUS,
                                                      {}).get("S")

            def get_old_status(task_record):
                return get_new_image(task_record).get(handlers.TASK_TR_STATUS,
                                                      {}).get("S")

            def is_task_tracking_table_update(task_record):
                if not from_tracking_table(task_record):
                    return False
                return task_record["eventName"] in ["UPDATE", "MODIFY"]

            def is_task_done(task_record):

                if not is_task_tracking_table_update(task_record):
                    return False

                new_status = get_new_status(task_record)
                old_status = get_old_status(task_record)

                if old_status != new_status:
                    return False
                return new_status in handlers.task_tracking_table.NOT_LONGER_ACTIVE_STATUSES

            def is_task_with_concurrency(task_record):
                return get_new_image(task_record).get(
                    handlers.TASK_TR_CONCURRENCY_KEY, {}).get("S") is not None

            def get_old_last_update(task_record):
                return get_old_image(task_record).get(
                    handlers.TASK_TR_LAST_WAIT_COMPLETION, {}).get("S")

            def get_new_last_update(task_record):
                return get_new_image(task_record).get(
                    handlers.TASK_TR_LAST_WAIT_COMPLETION, {}).get("S")

            def is_delete_task(task_record):
                return from_tracking_table(
                    r) and task_record["eventName"] == "REMOVE"

            def is_new_task(task_record):
                if from_tracking_table(
                        r) and task_record["eventName"] == "INSERT":
                    return get_new_status(
                        task_record) == handlers.STATUS_PENDING
                return False

            def is_completed_with_concurrency(task_record):
                return is_task_done(task_record) and is_task_with_concurrency(
                    task_record)

            def is_completed_without_concurrency(task_record):

                return is_task_done(
                    task_record) and not is_task_with_concurrency(task_record)

            def is_wait_for_completion(task_record):

                if not is_task_tracking_table_update(task_record):
                    return False

                if get_old_status(task_record) != handlers.STATUS_WAIT_FOR_COMPLETION or \
                        get_new_status(task_record) != handlers.STATUS_WAIT_FOR_COMPLETION:
                    return False

                return get_old_last_update(task_record) != get_new_last_update(
                    task_record)

            def is_concurrency_task_completed(concurrency_record):
                if not from_concurrency_table(concurrency_record):
                    return False

                if concurrency_record["eventName"] == "REMOVE":
                    return False

                return concurrency_record["dynamodb"].get("NewImage", {}).get(
                    "RunNext", {}).get("BOOL", False)

            def get_action_type(rec):

                if is_new_task(rec):
                    return NEW_TASK

                if is_completed_without_concurrency(rec):
                    return FINISHED_TASK

                if is_completed_with_concurrency(rec):
                    return FINISHED_CONCURRENCY_TASK

                if is_wait_for_completion(rec):
                    return CHECK_COMPLETION

                if is_delete_task(rec):
                    return DELETE_ITEM

                if is_concurrency_task_completed(rec):
                    return START_WAITING_ACTION

                return None

            for r in self._event.get("Records"):

                self._logger.debug("Record to process is {}",
                                   safe_json(r, indent=2))

                if r.get("eventSource") == "aws:dynamodb":

                    image_used = "NewImage" if "NewImage" in r[
                        "dynamodb"] else "OldImage"

                    if r["dynamodb"].get("NewImage", {}).get(handlers.TASK_TR_ACTION) is None and \
                            r["dynamodb"].get("OldImage", {}).get(handlers.TASK_TR_ACTION) is not None:
                        continue

                    self._logger.debug_enabled = r["dynamodb"][image_used].get(
                        handlers.TASK_TR_DEBUG, {}).get("BOOL", False)

                    update_to_handle = get_action_type(r)

                    if update_to_handle is not None:
                        yield update_to_handle, r
                    else:
                        self._logger.debug("No action for record")

        try:

            start = datetime.now()

            task_handlers = [
                self._handle_new_task_item,
                self._handle_finished_task_without_completion,
                self._handle_completed_concurrency_item,
                self._handle_check_completion, self._handle_deleted_item,
                self._handle_start_waiting_action
            ]

            for task_tracking_update_type, record in tasks_items_to_execute():
                self.done_work = True

                used_image = "OldImage" if record[
                    "eventName"] == "REMOVE" else "NewImage"
                image = record["dynamodb"][used_image]
                handled_item = unpack_record(image)
                self._logger.debug_enabled = handled_item.get(
                    handlers.TASK_TR_DEBUG, False)

                self._logger.debug(
                    "Executing handler function {} for type {} ({})",
                    task_handlers[task_tracking_update_type].__name__,
                    self.task_string(task_tracking_update_type),
                    task_tracking_update_type)
                task_handlers[task_tracking_update_type](handled_item)

            if not self.done_work:
                self._logger.clear()

            running_time = float((datetime.now() - start).total_seconds())
            if self.done_work:
                self._logger.debug(DEBUG_RESULT, running_time)

            return safe_dict({
                "datetime": datetime.now().isoformat(),
                "waiting-for-execution": self.waiting_for_execution_tasks,
                "started-check-for-completion": self.started_completion_checks,
                "started-execution": self.started_tasks,
                "started-waiting": self.started_waiting_tasks,
                "completed-concurrency-tasks": self.finished_concurrency_tasks,
                "running-time": running_time
            })

        finally:
            self._logger.flush()
class CliRequestHandler(object):
    """
    Class to handles requests from admin CLI
    """
    def __init__(self, event, context):
        """
        Initializes handle instance
        :param event: event to handle
        :param context: lambda context
        """
        self._event = event
        self._context = context
        self._logger = None

        self.additional_parameters = {}

        self.commands = {
            "describe-tasks":
            "get_tasks" if self.parameters.get("name") is None else "get_task",
            "start-task":
            "start_task"
        }

        self.attribute_transformations = {}
        self.result_transformations = {}

        # Setup logging
        classname = self.__class__.__name__
        dt = datetime.utcnow()
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream,
                                    buffersize=20,
                                    context=self._context)

    @property
    def action(self):
        """
        Retrieves admin REST api action from the event
        :return: name of the action of the event
        """
        return self._event["action"]

    @property
    def parameters(self):
        params = self._event.get("parameters", {})
        extra = self.additional_parameters.get(self.action, {})
        params.update(extra)

        return {p.replace("-", "_"): params[p] for p in params}

    @classmethod
    def is_handling_request(cls, event, _):
        """
        Returns True if the handler can handle the event
        :param _:
        :param event: tested event
        :return: True if the handles does handle the tested event
        """

        if event.get("source", "") not in [CLI_SOURCE, TEST_SOURCE]:
            return False
        return "action" in event

    def handle_request(self):
        """
        Handles the event
        :return: result of handling the event, result send back to REST admin api
        """
        def snake_to_pascal_case(s):
            converted = ""
            s = s.strip("_").capitalize()
            i = 0

            while i < len(s):
                if s[i] == "-":
                    pass
                elif s[i] == "_":
                    i += 1
                    converted += s[i].upper()
                else:
                    converted += s[i]
                i += 1

            return converted

        def dict_to_pascal_case(d):

            ps = {}

            if isinstance(d, dict):
                for i in d:
                    key = snake_to_pascal_case(i)
                    ps[key] = dict_to_pascal_case(d[i])
                return ps

            elif isinstance(d, list):
                return [dict_to_pascal_case(l) for l in d]

            return d

        try:
            self._logger.info("Handler {} : Received CLI request \n{}",
                              self.__class__.__name__,
                              safe_json(self._event, indent=3))

            # get access to admin api module
            admin_module = configuration.task_admin_api

            # get api action and map it to a function in the admin API
            fn_name = self.commands.get(self.action, None)
            if fn_name is None:
                raise ValueError("Command {} does not exist".format(
                    self.action))
            fn = getattr(admin_module, fn_name)

            # calling the mapped admin api method
            self._logger.info("Calling \"{}\" with parameters \n{}",
                              fn.__name__, safe_json(self.parameters,
                                                     indent=3))

            args = self.parameters
            args["context"] = self._context
            api_result = fn(**args)

            # convert to awscli PascalCase output format
            result = dict_to_pascal_case(api_result)

            # perform output transformation
            if fn_name in self.result_transformations:
                result = jmespath.search(self.result_transformations[fn_name],
                                         result)

            for t in self.attribute_transformations:
                if t in result:
                    if self.attribute_transformations[t] is not None:
                        result[self.attribute_transformations[t]] = result[t]
                        del result[t]

            # log formatted result
            json_result = safe_json(result, 3)
            self._logger.info("Call result is {}", json_result)

            return result

        except Exception as ex:
            self._logger.info("Call failed, error is {}", str(ex))
            return {"Error": str(ex)}
        finally:
            self._logger.flush()
class ScheduleHandler(object):
    """
    Class that handles time based events from CloudWatch rules
    """
    def __init__(self, event, context):
        """
        Initializes the instance.
        :param event: event to handle
        :param context: CLambda context
        """
        self._context = context
        self._event = event
        self._table = None

        # Setup logging
        classname = self.__class__.__name__
        dt = datetime.utcnow()
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream,
                                    buffersize=50,
                                    context=context)

        self.configuration_update = ScheduleHandler.is_config_update(
            self._event)
        if self.configuration_update:
            if "OldImage" in self._event["Records"][0]["dynamodb"]:
                self.updated_task = self._event["Records"][0]["dynamodb"][
                    "OldImage"][configuration.CONFIG_TASK_NAME]["S"]
            else:
                self.updated_task = self._event["Records"][0]["dynamodb"][
                    "NewImage"][configuration.CONFIG_TASK_NAME]["S"]

        self.execute_task_request = self.is_execute_event(self._event)
        self.executed_task_name = event.get(
            handlers.HANDLER_EVENT_TASK_NAME,
            "") if self.execute_task_request else None

    @classmethod
    def is_handling_request(cls, event, _):
        """
        Tests if event is handled by instance of this handler.
        :param _:
        :param event: Tested event
        :return: True if the event is a cloudwatch rule event for scheduling or configuration update
        """
        source = event.get(handlers.HANDLER_EVENT_SOURCE, "")

        if source == "aws.events":
            resources = event.get("resources", [])
            if len(resources) == 1 and resources[0].partition("/")[2].lower(
            ) == os.getenv(handlers.ENV_OPS_AUTOMATOR_RULE).lower():
                return True
            return False

        return ScheduleHandler.is_config_update(
            event) or ScheduleHandler.is_execute_event(event)

    @staticmethod
    def is_config_update(event):
        if event.get("Records", [{}])[0].get("eventSource",
                                             "") != "aws:dynamodb":
            return False

        source_arn = event["Records"][0]["eventSourceARN"]
        table_name = source_arn.split("/")[1]
        return table_name == os.getenv(configuration.ENV_CONFIG_TABLE)

    @staticmethod
    def is_execute_event(event):
        return event.get(
            handlers.HANDLER_EVENT_ACTION,
            "") == handlers.HANDLER_EVENT_SCHEDULER_EXECUTE_TASK and event.get(
                handlers.HANDLER_EVENT_TASK_NAME, None) is not None

    @property
    def _last_run_table(self):
        """
        Returns table to store last execution time for this handler.
        :return: table to store last execution time for this handler
        """
        if self._table is None:
            self._table = boto3.resource('dynamodb').Table(
                os.environ[handlers.ENV_LAST_RUN_TABLE])
            add_retry_methods_to_resource(self._table,
                                          ["get_item", "update_item"])
        return self._table

    def _get_last_run(self):
        """
        Returns the last UTC datetime this ops automator handler was executed.
        :return: Last datetime this handler was executed in timezone UTC
        """
        # get from table
        resp = self._last_run_table.get_item_with_retries(
            Key={NAME_ATTR: LAST_SCHEDULER_RUN_KEY}, ConsistentRead=True)

        # test if item was in table
        if "Item" in resp:
            return dateutil.parser.parse(resp["Item"]["value"]).replace(
                second=0, microsecond=0)
        else:
            # default for first call is current datetime minus one minute
            return datetime.now(tz=pytz.timezone("UCT")).replace(
                second=0, microsecond=0) - timedelta(minutes=1)

    def _set_last_run(self):
        """
        Stores and returns the current datetime in UTC as the last execution time of this handler.
        :return: Stored last execution time in UTC timezone
        """
        dt = datetime.now(tz=pytz.timezone("UCT")).replace(second=0,
                                                           microsecond=0)
        self._last_run_table.update_item(
            Key={NAME_ATTR: LAST_SCHEDULER_RUN_KEY},
            AttributeUpdates={
                "value": {
                    "Action": "PUT",
                    "Value": dt.isoformat()
                }
            })
        return dt

    def handle_request(self):
        """
        Handles the cloudwatch rule timer event
        :return: Started tasks, if any, information
        """

        start = datetime.now()

        try:
            task_config = TaskConfiguration(context=self._context,
                                            logger=self._logger)
            if not self.execute_task_request:
                result = self.handle_scheduler_tasks(task_config)
            else:
                result = self.handle_execute_task_request(task_config)

            running_time = float((datetime.now() - start).total_seconds())

            self._logger.info(INFO_RESULT, running_time)

            return result
        finally:
            self._logger.flush()

    def handle_scheduler_tasks(self, task_config):

        started_tasks = {}
        start = datetime.now()

        last_run_dt = self._get_last_run()

        self._logger.info(INFO_LAST_SAVED, last_run_dt.isoformat())

        if self.configuration_update:
            self._logger.info(INFO_CONFIG_RUN, self.updated_task)

        current_dt = self._set_last_run()
        already_ran_this_minute = last_run_dt == current_dt
        if already_ran_this_minute and not (self.configuration_update
                                            or self.execute_task_request):
            self._logger.info(INFO_TASK_SCHEDULER_ALREADY_RAN)

        else:

            self._logger.info(INFO_CURRENT_SCHEDULING_DT, current_dt)

            task = None
            enabled_tasks = 0

            next_executed_task = None
            utc = pytz.timezone("UTC")

            tasks = [
                t for t in task_config.get_tasks()
                if t.get(handlers.TASK_INTERVAL) is not None
                and t.get(handlers.TASK_ENABLED, True)
            ]

            try:
                for task in tasks:

                    enabled_tasks += 1

                    self._logger.debug_enabled = task[handlers.TASK_DEBUG]

                    task_name = task[handlers.TASK_NAME]

                    # timezone for specific task
                    task_timezone = pytz.timezone(task[handlers.TASK_TIMEZONE])

                    # create cron expression to test if task needs te be executed
                    task_cron_expression = CronExpression(
                        expression=task[handlers.TASK_INTERVAL])

                    localized_last_run = last_run_dt.astimezone(task_timezone)
                    localized_current_dt = current_dt.astimezone(task_timezone)

                    next_execution = task_cron_expression.first_within_next(
                        timedelta(hours=24), localized_current_dt)
                    next_execution_utc = next_execution.astimezone(
                        utc).replace(microsecond=0
                                     ) if next_execution is not None else None

                    if next_execution_utc is not None:
                        if next_executed_task is None or next_execution_utc < next_executed_task[
                                0]:
                            next_executed_task = (next_execution_utc, task)

                    if already_ran_this_minute:
                        continue

                    # test if task needs te be executed since last run of ops automator
                    execute_dt_since_last = task_cron_expression.last_since(
                        localized_last_run, localized_current_dt)

                    if execute_dt_since_last is None:
                        if next_execution is not None:
                            next_execution = next_execution.astimezone(
                                task_timezone)
                            self._logger.info(INFO_NEXT_EXECUTION, task_name,
                                              next_execution.isoformat(),
                                              task_timezone)
                        else:
                            self._logger.info(INFO_NO_NEXT_WITHIN, task_name)
                        continue

                    self._logger.info(INFO_SCHEDULED_TASK, task_name,
                                      execute_dt_since_last, task_timezone,
                                      str(safe_json(task, indent=2)))

                    # create an event for lambda function that starts execution by selecting for resources for this task
                    task_group, sub_tasks = self._execute_task(
                        task, execute_dt_since_last)

                    started_tasks[task_name] = {
                        "task-group": task_group,
                        "sub-tasks": sub_tasks
                    }

                if started_tasks:
                    self._logger.info(INFO_STARTED_TASKS, enabled_tasks,
                                      ",".join(started_tasks))
                else:
                    self._logger.info(INFO_NO_TASKS_STARTED, enabled_tasks)

                self._set_next_schedule_event(current_dt, next_executed_task)

                running_time = float((datetime.now() - start).total_seconds())

                return safe_dict({
                    "datetime": datetime.now().isoformat(),
                    "running-time": running_time,
                    "event-datetime": current_dt.isoformat(),
                    "enabled_tasks": enabled_tasks,
                    "started-tasks": started_tasks
                })

            except ValueError as ex:
                self._logger.error(ERR_SCHEDULE_HANDLER, ex,
                                   safe_json(task, indent=2))

    def handle_execute_task_request(self, task_config):

        self._logger.info(INF_HANDLING_EXEC_REQUEST, self.executed_task_name)

        task_to_execute = task_config.get_task(name=self.executed_task_name)
        if task_to_execute is None:
            raise ValueError(
                "Task with name {} does not exists for stack {}".format(
                    self.executed_task_name,
                    os.getenv(handlers.ENV_STACK_NAME)))

        if not task_to_execute.get(handlers.TASK_ENABLED):
            raise ValueError("Task with name {} is not enabled",
                             self.executed_task_name)

        task_group, sub_tasks = self._execute_task(task_to_execute)
        return safe_dict({
            "datetime": datetime.now().isoformat(),
            "executed-task": self.executed_task_name,
            "task-group": task_group,
            "sub-tasks": sub_tasks
        })

    def _set_next_schedule_event(self, scheduler_dt, next_executed_task):
        """
        Sets the cron expression of the scheduler event rule in cloudwatch depending on next executed task
        :param scheduler_dt: dt used for this scheduler run
        :param next_executed_task: Next task to execute
        :return:
        """
        if next_executed_task is not None:

            utc = pytz.timezone("UTC")

            time_str = "{} ({})".format(next_executed_task[0].isoformat(), utc)
            next_task_tz = pytz.timezone(
                next_executed_task[1][handlers.TASK_TIMEZONE])
            if next_task_tz != utc:
                time_str += ", {} ({})".format(
                    next_executed_task[0].astimezone(next_task_tz),
                    next_task_tz)
            self._logger.info(INFO_NEXT_EXECUTED_TASK,
                              next_executed_task[1][handlers.TASK_NAME],
                              time_str)

            if next_executed_task[0] > scheduler_dt + timedelta(minutes=5):
                next_event_time = handlers.set_event_for_time(
                    next_executed_task[0],
                    task=next_executed_task[1],
                    logger=self._logger,
                    context=self._context)
                self._logger.info(INF_NEXT_EVENT, next_event_time.isoformat())
            else:
                handlers.set_scheduler_rule_every_minute(
                    task=next_executed_task[1])
                self._logger.info(INFO_NEXT_ONE_MINUTE)
        else:
            self._logger.info(INFO_NO_TASKS_SCHEDULED)
            next_event_time = handlers.set_event_for_time(
                scheduler_dt, context=self._context, logger=self._logger)
            self._logger.info(INF_NEXT_EVENT, next_event_time.isoformat())

    @staticmethod
    def task_account_region_sub_tasks(task):
        action_properties = actions.get_action_properties(
            task[handlers.TASK_ACTION])

        aggregation_level = action_properties[actions.ACTION_AGGREGATION]
        # property may be a lambda function, call the function with parameters of task as lambda parameters
        if types.FunctionType == type(aggregation_level):
            aggregation_level = aggregation_level(task.get("parameters", {}))

        if aggregation_level == actions.ACTION_AGGREGATION_TASK:
            yield {
                handlers.TASK_THIS_ACCOUNT: task[handlers.TASK_THIS_ACCOUNT],
                handlers.TASK_ACCOUNTS: task[handlers.TASK_ACCOUNTS],
                handlers.TASK_REGIONS: task[handlers.TASK_REGIONS]
            }
        else:
            if task[handlers.TASK_THIS_ACCOUNT]:
                if aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT:
                    yield {
                        handlers.TASK_THIS_ACCOUNT: True,
                        handlers.TASK_ACCOUNTS: [],
                        handlers.TASK_REGIONS: task[handlers.TASK_REGIONS]
                    }
                else:
                    for region in task.get(handlers.TASK_REGIONS, [None]):
                        yield {
                            handlers.TASK_THIS_ACCOUNT: True,
                            handlers.TASK_ACCOUNTS: [],
                            handlers.TASK_REGIONS: [region]
                        }

            for account in task.get(handlers.TASK_ACCOUNTS, []):
                if aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT:
                    yield {
                        handlers.TASK_THIS_ACCOUNT: False,
                        handlers.TASK_ACCOUNTS: [account],
                        handlers.TASK_REGIONS: task[handlers.TASK_REGIONS]
                    }
                else:
                    for region in task.get(handlers.TASK_REGIONS, [None]):
                        yield {
                            handlers.TASK_THIS_ACCOUNT: False,
                            handlers.TASK_ACCOUNTS: [account],
                            handlers.TASK_REGIONS: [region]
                        }

    def _execute_task(self, task, dt=None, task_group=None):
        """
        Execute a task by starting a lambda function that selects the resources for that action
        :param task: Task started
        :param dt: Task start datetime
        :return:
        """

        debug_state = self._logger.debug_enabled
        self._logger.debug_enabled = task.get(handlers.TASK_DEBUG, False)
        if task_group is None:
            task_group = str(uuid.uuid4())
        try:

            event = {
                handlers.HANDLER_EVENT_ACTION:
                handlers.HANDLER_ACTION_SELECT_RESOURCES,
                handlers.HANDLER_EVENT_TASK:
                task,
                handlers.HANDLER_EVENT_SOURCE:
                "scheduler-handler",
                handlers.HANDLER_EVENT_TASK_DT:
                dt.isoformat()
                if dt is not None else datetime.utcnow().isoformat(),
                handlers.HANDLER_EVENT_TASK_GROUP:
                task_group
            }

            sub_tasks = list(
                ScheduleHandler.task_account_region_sub_tasks(task))
            for sub_task in sub_tasks:

                event[handlers.HANDLER_EVENT_SUB_TASK] = sub_task

                if not handlers.running_local(self._context):

                    if task[handlers.
                            TASK_SELECT_SIZE] != actions.ACTION_USE_ECS:
                        # start lambda function to scan for task resources
                        payload = str.encode(safe_json(event))
                        client = get_client_with_retries("lambda", ["invoke"],
                                                         context=self._context)

                        function_name = "{}-{}-{}".format(
                            os.getenv(handlers.ENV_STACK_NAME),
                            os.getenv(handlers.ENV_LAMBDA_NAME),
                            task[handlers.TASK_SELECT_SIZE])

                        self._logger.info(INFO_RUNNING_LAMBDA, function_name)

                        try:
                            resp = client.invoke_with_retries(
                                FunctionName=function_name,
                                InvocationType="Event",
                                LogType="None",
                                Payload=payload)

                            self._logger.debug(DEBUG_LAMBDA,
                                               resp["StatusCode"], payload)
                        except Exception as ex:
                            self._logger.error(ERR_FAILED_START_LAMBDA_TASK,
                                               str(ex))

                    else:
                        ecs_args = {
                            handlers.HANDLER_EVENT_ACTION:
                            handlers.HANDLER_ACTION_SELECT_RESOURCES,
                            handlers.TASK_NAME: task[handlers.TASK_NAME],
                            handlers.HANDLER_EVENT_SUB_TASK: sub_task
                        }

                        ecs_memory = task.get(handlers.TASK_SELECT_ECS_MEMORY,
                                              None)

                        self._logger.info(INFO_RUNNING_AS_ECS_JOB,
                                          task[handlers.TASK_NAME])
                        handlers.run_as_ecs_job(ecs_args,
                                                ecs_memory_size=ecs_memory,
                                                context=self._context,
                                                logger=self._logger)

                else:
                    if task[handlers.
                            TASK_SELECT_SIZE] == actions.ACTION_USE_ECS:
                        ecs_args = {
                            handlers.HANDLER_EVENT_ACTION:
                            handlers.HANDLER_ACTION_SELECT_RESOURCES,
                            handlers.TASK_NAME: task[handlers.TASK_NAME],
                            handlers.HANDLER_EVENT_SUB_TASK: sub_task
                        }

                        ecs_memory = task.get(handlers.TASK_SELECT_ECS_MEMORY,
                                              None)

                        handlers.run_as_ecs_job(ecs_args,
                                                ecs_memory_size=ecs_memory,
                                                logger=self._logger)
                    else:
                        # or if not running in lambda environment pass event to main task handler
                        lambda_handler(event, self._context)

            return task_group, sub_tasks

        finally:
            self._logger.debug_enabled = debug_state
Ejemplo n.º 13
0
    def __init__(self, event, context, logger=None, tracking_store=None):
        def log_stream_name():

            classname = self.__class__.__name__
            dt = datetime.utcnow()

            account = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS,
                                      {}).get(handlers.HANDLER_EVENT_ACCOUNT,
                                              "")
            regions = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS,
                                      {}).get(handlers.HANDLER_EVENT_REGIONS,
                                              [])

            if account is not None and len(regions) > 0:
                account_and_region = "-".join([account, regions[0]]) + "-"

            else:
                region = ""

                if self.sub_task is not None:
                    account = ""
                    if self._this_account:
                        if len(self._accounts) == 0:
                            account = os.getenv(
                                handlers.ENV_OPS_AUTOMATOR_ACCOUNT)
                    elif len(self._accounts) == 1:
                        account = self._accounts[0]

                    region = self._regions[0] if len(
                        self._regions) == 1 else ""

                if account != "":
                    if region not in ["", None]:
                        account_and_region = "-".join([account, region]) + "-"
                    else:
                        account_and_region = account
                else:
                    account_and_region = ""

            return LOG_STREAM.format(classname, self.task[handlers.TASK_NAME],
                                     account_and_region, dt.year, dt.month,
                                     dt.day)

        self._context = context
        self._event = event
        self.task = event[handlers.HANDLER_EVENT_TASK]
        self.sub_task = event.get(handlers.HANDLER_EVENT_SUB_TASK, None)
        self.use_custom_select = event.get(
            handlers.HANDLER_EVENT_CUSTOM_SELECT, True)

        # the job id is used to correlate all generated tasks for the selected resources
        self.task_group = self._event.get(handlers.HANDLER_EVENT_TASK_GROUP,
                                          None)
        if self.task_group is None:
            self.task_group = str(uuid.uuid4())

        debug = event[handlers.HANDLER_EVENT_TASK].get(handlers.TASK_DEBUG,
                                                       False)
        if logger is None:
            self._logger = QueuedLogger(logstream=log_stream_name(),
                                        context=context,
                                        buffersize=50 if debug else 20,
                                        debug=debug)
        else:
            self._logger = logger

        self._sts = None

        self.select_args = event.get(handlers.HANDLER_SELECT_ARGUMENTS, {})
        self.task_dt = event[handlers.HANDLER_EVENT_TASK_DT]

        self.action_properties = actions.get_action_properties(
            self.task[handlers.TASK_ACTION])
        self.action_class = actions.get_action_class(
            self.task[handlers.TASK_ACTION])
        self.task_parameters = self.task.get(handlers.TASK_PARAMETERS, {})
        self.metrics = self.task.get(handlers.TASK_METRICS, False)

        self.service = self.action_properties[actions.ACTION_SERVICE]
        self.keep_tags = self.action_properties.get(
            actions.ACTION_KEEP_RESOURCE_TAGS, True)

        self.source = self._event.get(handlers.HANDLER_EVENT_SOURCE,
                                      handlers.UNKNOWN_SOURCE)
        self.run_local = handlers.running_local(self._context)
        self._timer = None
        self._timeout_event = self._timeout_event = threading.Event()

        self.aggregation_level = self.action_properties.get(
            actions.ACTION_AGGREGATION, actions.ACTION_AGGREGATION_RESOURCE)
        if self.aggregation_level is not None and isinstance(
                self.aggregation_level, types.FunctionType):
            self.aggregation_level = self.aggregation_level(
                self.task_parameters)

        self.batch_size = self.action_properties.get(actions.ACTION_BATCH_SIZE)
        if self.batch_size is not None and isinstance(self.batch_size,
                                                      types.FunctionType):
            self.batch_size = self.batch_size(self.task_parameters)

        self.actions_tracking = TaskTrackingTable(
            self._context,
            logger=self._logger) if tracking_store is None else tracking_store
Ejemplo n.º 14
0
class SelectResourcesHandler(object):
    """
    Class that handles the selection of AWS service resources for a task to perform its action on.
    """
    def __init__(self, event, context, logger=None, tracking_store=None):
        def log_stream_name():

            classname = self.__class__.__name__
            dt = datetime.utcnow()

            account = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS,
                                      {}).get(handlers.HANDLER_EVENT_ACCOUNT,
                                              "")
            regions = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS,
                                      {}).get(handlers.HANDLER_EVENT_REGIONS,
                                              [])

            if account is not None and len(regions) > 0:
                account_and_region = "-".join([account, regions[0]]) + "-"

            else:
                region = ""

                if self.sub_task is not None:
                    account = ""
                    if self._this_account:
                        if len(self._accounts) == 0:
                            account = os.getenv(
                                handlers.ENV_OPS_AUTOMATOR_ACCOUNT)
                    elif len(self._accounts) == 1:
                        account = self._accounts[0]

                    region = self._regions[0] if len(
                        self._regions) == 1 else ""

                if account != "":
                    if region not in ["", None]:
                        account_and_region = "-".join([account, region]) + "-"
                    else:
                        account_and_region = account
                else:
                    account_and_region = ""

            return LOG_STREAM.format(classname, self.task[handlers.TASK_NAME],
                                     account_and_region, dt.year, dt.month,
                                     dt.day)

        self._context = context
        self._event = event
        self.task = event[handlers.HANDLER_EVENT_TASK]
        self.sub_task = event.get(handlers.HANDLER_EVENT_SUB_TASK, None)
        self.use_custom_select = event.get(
            handlers.HANDLER_EVENT_CUSTOM_SELECT, True)

        # the job id is used to correlate all generated tasks for the selected resources
        self.task_group = self._event.get(handlers.HANDLER_EVENT_TASK_GROUP,
                                          None)
        if self.task_group is None:
            self.task_group = str(uuid.uuid4())

        debug = event[handlers.HANDLER_EVENT_TASK].get(handlers.TASK_DEBUG,
                                                       False)
        if logger is None:
            self._logger = QueuedLogger(logstream=log_stream_name(),
                                        context=context,
                                        buffersize=50 if debug else 20,
                                        debug=debug)
        else:
            self._logger = logger

        self._sts = None

        self.select_args = event.get(handlers.HANDLER_SELECT_ARGUMENTS, {})
        self.task_dt = event[handlers.HANDLER_EVENT_TASK_DT]

        self.action_properties = actions.get_action_properties(
            self.task[handlers.TASK_ACTION])
        self.action_class = actions.get_action_class(
            self.task[handlers.TASK_ACTION])
        self.task_parameters = self.task.get(handlers.TASK_PARAMETERS, {})
        self.metrics = self.task.get(handlers.TASK_METRICS, False)

        self.service = self.action_properties[actions.ACTION_SERVICE]
        self.keep_tags = self.action_properties.get(
            actions.ACTION_KEEP_RESOURCE_TAGS, True)

        self.source = self._event.get(handlers.HANDLER_EVENT_SOURCE,
                                      handlers.UNKNOWN_SOURCE)
        self.run_local = handlers.running_local(self._context)
        self._timer = None
        self._timeout_event = self._timeout_event = threading.Event()

        self.aggregation_level = self.action_properties.get(
            actions.ACTION_AGGREGATION, actions.ACTION_AGGREGATION_RESOURCE)
        if self.aggregation_level is not None and isinstance(
                self.aggregation_level, types.FunctionType):
            self.aggregation_level = self.aggregation_level(
                self.task_parameters)

        self.batch_size = self.action_properties.get(actions.ACTION_BATCH_SIZE)
        if self.batch_size is not None and isinstance(self.batch_size,
                                                      types.FunctionType):
            self.batch_size = self.batch_size(self.task_parameters)

        self.actions_tracking = TaskTrackingTable(
            self._context,
            logger=self._logger) if tracking_store is None else tracking_store

    @classmethod
    def is_handling_request(cls, event, _):
        """
        Tests if this handler handles the event.
        :param _:
        :param event: The event tyo test
        :return: True if the event is handled by this handler
        """
        return event.get(handlers.HANDLER_EVENT_ACTION,
                         "") == handlers.HANDLER_ACTION_SELECT_RESOURCES

    @property
    def _task_tag(self):
        """
        Returns the name of the tag that contains the list of actions for a resource.
        :return: The name of the tag that contains the list of actions for a resource
        """
        name = os.environ.get(handlers.ENV_AUTOMATOR_TAG_NAME)
        if name is None:
            name = handlers.DEFAULT_SCHEDULER_TAG
        return name

    @property
    def sts(self):
        if self._sts is None:
            self._sts = boto3.client("sts")
        return self._sts

    @property
    def _resource_name(self):
        name = self.action_properties[actions.ACTION_RESOURCES]
        if name in [None, ""]:
            name = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}).get(
                handlers.HANDLER_EVENT_RESOURCE_NAME, "")
        return name

    def _check_can_execute(self, selected_resources):
        """
        Checks if the action for the task can be executed with the selected resources
        :param selected_resources: 
        :return: 
        """
        check_method = getattr(self.action_class, actions.CHECK_CAN_EXECUTE,
                               None)
        if check_method:
            try:
                check_method(selected_resources, self.task_parameters)
                return True
            except ValueError as ex:
                self._logger.error(ERR_CAN_NOT_EXECUTE_WITH_THESE_RESOURCES,
                                   self.task[handlers.TASK_ACTION],
                                   self.task[handlers.TASK_NAME], str(ex))
                return False
        return True

    def _task_assumed_roles(self):
        """
        Returns a list of service instances for each handled account/role
        :return:
        """

        # account can optionally be passed in by events
        account = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS,
                                  {}).get(handlers.HANDLER_EVENT_ACCOUNT)

        if account is not None:
            assumed_role = handlers.get_account_role(account=account,
                                                     task=self.task,
                                                     logger=self._logger)
            if assumed_role is None:
                if account != os.getenv(handlers.ENV_OPS_AUTOMATOR_ACCOUNT):
                    self._logger.error(ERR_ACCOUNT_SKIPPED_NO_ROLE, account)
                yield None
            else:
                yield assumed_role

        else:
            # no role if processing scheduled task in own account
            if self._this_account:
                assumed_role = handlers.get_account_role(account=os.getenv(
                    handlers.ENV_OPS_AUTOMATOR_ACCOUNT),
                                                         task=self.task,
                                                         logger=self._logger)
                yield assumed_role

            for acct in self._accounts:
                # for external accounts
                assumed_role = handlers.get_account_role(account=acct,
                                                         task=self.task,
                                                         logger=self._logger)
                if assumed_role is not None:
                    yield assumed_role

    @property
    def _this_account(self):
        if self.sub_task is not None:
            return self.sub_task[handlers.TASK_THIS_ACCOUNT]
        return self.task.get(handlers.TASK_THIS_ACCOUNT, True)

    @property
    def _accounts(self):
        if self.sub_task is not None:
            return self.sub_task[handlers.TASK_ACCOUNTS]
        return self.task.get(handlers.TASK_ACCOUNTS, [])

    @property
    def _regions(self):
        """
        Returns the regions in where resources are selected
        :return:
        """
        regions = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS,
                                  {}).get(handlers.HANDLER_EVENT_REGIONS)
        if regions is None:
            regions = self.sub_task[
                handlers.
                TASK_REGIONS] if self.sub_task is not None else self.task.get(
                    handlers.TASK_REGIONS, [None])
        else:
            # check if the regions in the event are in the task configurations regions
            checked_regions = [r for r in regions if r in regions]
            if len(checked_regions) != len(regions):
                self._logger.warning(WARN_REGION_NOT_IN_TASK_CONFIGURATION,
                                     self._event)
                return checked_regions
        return regions if len(regions) > 0 else [None]

    def handle_request(self):
        """
        Handles the select resources request. Creates new actions for resources found for a task
        :return: Results of handling the request
        """
        def filter_by_action_filter(srv, used_role, r):
            filter_method = getattr(self.action_class,
                                    actions.SELECT_AND_PROCESS_RESOURCE_METHOD,
                                    None)
            if filter_method is not None:
                r = filter_method(srv, self._logger, self._resource_name, r,
                                  self._context, self.task, used_role)
                if r is None:
                    self._logger.debug(
                        DEBUG_FILTER_METHOD, self.action_class.__name__,
                        actions.SELECT_AND_PROCESS_RESOURCE_METHOD)
                    return None
                else:
                    self._logger.debug(
                        DEBUG_FILTERED_RESOURCE, self.action_class.__name__,
                        actions.SELECT_AND_PROCESS_RESOURCE_METHOD,
                        safe_json(r, indent=3))

            return r

        def is_selected_resource(aws_service, resource, used_role, taskname,
                                 tags_filter, does_resource_supports_tags):

            # No tags then just use filter method if any
            if not does_resource_supports_tags:
                self._logger.debug(DEBUG_RESOURCE_NO_TAGS, resource)
                return filter_by_action_filter(srv=aws_service,
                                               used_role=used_role,
                                               r=resource)

            tags = resource.get("Tags", {})

            # name of the tag that holds the list of tasks for this resource
            tagname = self._task_tag

            if tags_filter is None:
                # test if name of the task is in list of tasks in tag value
                if (tagname not in tags) or (taskname
                                             not in tagging.split_task_list(
                                                 tags[tagname])):
                    self._logger.debug(
                        DEBUG_RESOURCE_NOT_SELECTED,
                        safe_json(resource, indent=2), taskname, ','.join(
                            ["'{}'='{}'".format(t, tags[t]) for t in tags]))
                    return None
                self._logger.debug(DEBUG_SELECTED_BY_TASK_NAME_IN_TAG_VALUE,
                                   safe_json(resource, indent=2), tagname,
                                   taskname)
            else:
                # using a tag filter, * means any tag
                if tags_filter != tagging.tag_filter_set.WILDCARD_CHAR:
                    # test if there are any tags matching the tag filter
                    if not TagFilterExpression(tags_filter).is_match(tags):
                        self._logger.debug(
                            DEBUG_RESOURCE_NOT_SELECTED_TAG_FILTER,
                            safe_json(resource, indent=2), taskname, ','.join([
                                "'{}'='{}'".format(t, tags[t]) for t in tags
                            ]))
                        return None
                    self._logger.debug(DEBUG_SELECTED_BY_TAG_FILTER,
                                       safe_json(resource, indent=2), tags,
                                       tag_filter_str, taskname)
                else:
                    self._logger.debug(DEBUG_SELECTED_WILDCARD_TAG_FILTER,
                                       safe_json(resource, indent=2), taskname)
                    return filter_by_action_filter(srv=aws_service,
                                                   used_role=used_role,
                                                   r=resource)

            return filter_by_action_filter(srv=aws_service,
                                           used_role=used_role,
                                           r=resource)

        def resource_batches(resources):
            """
            Returns resources as chunks of size items. If the class has an optional custom aggregation function then the 
            resources are aggregated first using this function before applying the batch size
            :param resources: resources to process
            :return: Generator for blocks of resource items
            """

            aggregate_func = getattr(self.action_class,
                                     actions.CUSTOM_AGGREGATE_METHOD, None)

            for i in aggregate_func(
                    resources, self.task_parameters,
                    self._logger) if aggregate_func is not None else [
                        resources
                    ]:
                if self.batch_size is None:
                    yield i
                else:
                    first = 0
                    while first < len(i):
                        yield i[first:first + self.batch_size]
                        first += self.batch_size

        def setup_tag_filtering(t_name):
            # get optional tag filter
            no_select_by_tags = self.action_properties.get(
                actions.ACTION_NO_TAG_SELECT, False)
            if no_select_by_tags:
                tag_filter_string = tagging.tag_filter_set.WILDCARD_CHAR
            else:
                tag_filter_string = self.task.get(handlers.TASK_TAG_FILTER)

            # set if only a single task is required for selecting the resources, it is used to optimise the select
            select_tag = None
            if tag_filter_string is None:
                self._logger.debug(DEBUG_SELECT_BY_TASK_NAME,
                                   self._resource_name, self._task_tag, t_name)
                select_tag = self._task_tag
            elif tag_filter_string == tagging.tag_filter_set.WILDCARD_CHAR:
                self._logger.debug(DEBUG_SELECT_ALL_RESOURCES,
                                   self._resource_name)
            else:
                self._logger.debug(DEBUG_TAG_FILTER_USED_TO_SELECT_RESOURCES,
                                   self._resource_name)
                # build the tag expression that us used to filter the resources
                tag_filter_expression = TagFilterExpression(tag_filter_string)
                # the keys of the used tags
                tag_filter_expression_tag_keys = list(
                    tag_filter_expression.get_filter_keys())
                # if there is only a single tag then we can optimize by just filtering on that specific tag
                if len(tag_filter_expression_tag_keys) == 1 and \
                        tagging.tag_filter_set.WILDCARD_CHAR not in tag_filter_expression_tag_keys[0]:
                    select_tag = tag_filter_expression_tag_keys[0]
            return select_tag, tag_filter_string

        def add_aggregated(aggregated_resources):
            # create tasks action for aggregated resources , optionally split in batch size chunks
            for ra in resource_batches(aggregated_resources):
                if self._check_can_execute(ra):
                    action_item = self.actions_tracking.add_task_action(
                        task=self.task,
                        assumed_role=assumed_role,
                        action_resources=ra,
                        task_datetime=self.task_dt,
                        source=self.source,
                        task_group=self.task_group)

                    self._logger.debug(DEBUG_ADDED_AGGREGATED_RESOURCES_TASK,
                                       action_item[handlers.TASK_TR_ID],
                                       len(ra), self._resource_name,
                                       self.task[handlers.TASK_NAME])

                    self._logger.debug("Added item\n{}",
                                       safe_json(action_item, indent=3))

                    yield action_item

        def add_as_individual(resources):
            for ri in resources:
                # task action for each selected resource
                if self._check_can_execute([ri]):
                    action_item = self.actions_tracking.add_task_action(
                        task=self.task,
                        assumed_role=assumed_role,
                        action_resources=ri,
                        task_datetime=self.task_dt,
                        source=self.source,
                        task_group=self.task_group)

                    self._logger.debug(DEBUG_ADD_SINGLE_RESOURCE_TASK,
                                       action_item[handlers.TASK_TR_ID],
                                       self._resource_name,
                                       self.task[handlers.TASK_NAME])
                    self._logger.debug("Added item\n{}",
                                       safe_json(action_item, indent=3))
                    yield action_item

        try:
            task_items = []
            start = datetime.now()

            self._logger.debug(DEBUG_EVENT, safe_json(self._event, indent=3))
            self._logger.debug(DEBUG_ACTION,
                               safe_json(self.action_properties, indent=3))

            self._logger.info(INFO_SELECTED_RESOURCES, self._resource_name,
                              self.service, self.task[handlers.TASK_NAME])
            self._logger.info(INFO_AGGR_LEVEL, self.aggregation_level)

            task_level_aggregated_resources = []

            args = self._build_describe_argument()

            service_resource_with_tags = services.create_service(
                self.service).resources_with_tags
            if self._resource_name == "":
                supports_tags = len(service_resource_with_tags) != 0
            else:
                supports_tags = self._resource_name.lower() in [
                    r.lower() for r in service_resource_with_tags
                ]

            args["tags"] = supports_tags
            self._logger.info(INFO_USE_TAGS_TO_SELECT,
                              "R" if supports_tags else "No r")

            task_name = self.task[handlers.TASK_NAME]
            count_resource_items = 0
            selected_resource_items = 0

            select_on_tag, tag_filter_str = setup_tag_filtering(task_name)

            filter_func = getattr(self.action_class,
                                  actions.FILTER_RESOURCE_METHOD, None)

            # timer to guard selection time and log warning if getting close to lambda timeout
            if self._context is not None:
                self.start_timer(REMAINING_TIME_AFTER_DESCRIBE)
            try:

                for assumed_role in self._task_assumed_roles():
                    retry_strategy = get_default_retry_strategy(
                        service=self.service, context=self._context)

                    service = services.create_service(
                        service_name=self.service,
                        service_retry_strategy=retry_strategy,
                        role_arn=assumed_role)

                    if self.is_timed_out():
                        break

                    # contains resources for account
                    account_level_aggregated_resources = []

                    self._logger.info(INFO_ACCOUNT, service.aws_account)
                    if assumed_role not in [None, ""]:
                        self._logger.info(INFO_ASSUMED_ROLE, assumed_role)

                    for region in self._regions:

                        # test for timeouts
                        if self.is_timed_out():
                            break

                        # handle region passed in the event
                        if region is not None:
                            args["region"] = region
                        else:
                            if "region" in args:
                                del args["region"]

                        # resources can be passed in the invent by event handlers
                        all_resources = self._event.get(
                            handlers.HANDLER_SELECT_RESOURCES, None)

                        if all_resources is None:

                            # actions can have an optional method to select resources
                            action_custom_describe_function = getattr(
                                self.action_class, "describe_resources", None)
                            if action_custom_describe_function is not None and self.use_custom_select:
                                all_resources = action_custom_describe_function(
                                    service, self.task, region)
                            else:
                                # select resources from the service
                                self._logger.debug(DEBUG_SELECT_PARAMETERS,
                                                   self._resource_name,
                                                   self.service, args)
                                # selecting a list of all resources in this account/region
                                all_resources = list(
                                    service.describe(
                                        self._resource_name,
                                        filter_func=filter_func,
                                        select_on_tag=select_on_tag,
                                        **args))
                            # test for timeout
                            if self.is_timed_out():
                                break

                            count_resource_items += len(all_resources)

                        self._logger.info(INFO_RESOURCES_FOUND,
                                          len(all_resources))

                        # select resources that are processed by the task
                        selected_resources = []
                        for sr in all_resources:
                            sel = is_selected_resource(
                                aws_service=service,
                                resource=sr,
                                used_role=assumed_role,
                                taskname=task_name,
                                tags_filter=tag_filter_str,
                                does_resource_supports_tags=supports_tags)
                            if sel is not None:
                                selected_resources.append(sel)

                        selected_resource_items += len(selected_resources)

                        # display found and selected resources
                        if len(all_resources) > 0:
                            self._logger.info(INFO_RESOURCES_SELECTED,
                                              len(selected_resources))
                            if len(selected_resources) == 0:
                                continue

                        # delete tags if not needed by the action
                        if not self.keep_tags:
                            for res in selected_resources:
                                if "Tags" in res:
                                    del res["Tags"]

                        # add resources to total list of resources for this task
                        if self.aggregation_level == actions.ACTION_AGGREGATION_TASK:
                            task_level_aggregated_resources += selected_resources

                        # add resources to list of resources for this account
                        if self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT:
                            account_level_aggregated_resources += selected_resources

                        # add batch(es) of resources for this region
                        if self.aggregation_level == actions.ACTION_AGGREGATION_REGION and len(
                                selected_resources) > 0:
                            task_items += list(
                                add_aggregated(selected_resources))

                        # no aggregation, add each individual resource
                        if self.aggregation_level == actions.ACTION_AGGREGATION_RESOURCE and len(
                                selected_resources) > 0:
                            task_items += list(
                                add_as_individual(selected_resources))

                    # at the end of the region loop, check if aggregated resources for account need to be added
                    if self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT and len(
                            account_level_aggregated_resources) > 0:
                        task_items += list(
                            add_aggregated(account_level_aggregated_resources))

                # at the end of the accounts loop, check if aggregated resources for task need to be added
                if self.aggregation_level == actions.ACTION_AGGREGATION_TASK and len(
                        task_level_aggregated_resources) > 0:
                    task_items += list(
                        add_aggregated(task_level_aggregated_resources))
            except Exception as ex:
                raise_exception(ERR_SELECTING_TASK_RESOURCES,
                                self.task[handlers.TASK_NAME], ex)

            finally:
                if self._timer is not None:
                    # cancel time used avoid timeouts when selecting resources
                    self._timer.cancel()
                    if self.is_timed_out():
                        raise_exception(ERR_TIMEOUT_SELECTING_RESOURCES,
                                        self._resource_name, self.service,
                                        task_name)

                    self.start_timer(REMAINING_TIME_AFTER_STORE)

                    self.actions_tracking.flush(self._timeout_event)
                    if self.is_timed_out():
                        raise_exception(
                            ERR_CREATING_TASKS_FOR_SELECTED_RESOURCES,
                            task_name)
                    self._timer.cancel()
                else:
                    self.actions_tracking.flush()

            self._logger.info(INFO_ADDED_ITEMS, len(task_items),
                              self.task[handlers.TASK_NAME])

            running_time = float((datetime.now() - start).total_seconds())
            self._logger.info(INFO_RESULT, running_time)

            if self.metrics:
                put_task_select_data(task_name=task_name,
                                     items=count_resource_items,
                                     selected_items=selected_resource_items,
                                     logger=self._logger,
                                     selection_time=running_time)

            return safe_dict({
                "datetime": datetime.now().isoformat(),
                "running-time": running_time,
                "dispatched-tasks": task_items
            })

        finally:
            self._logger.flush()

    def select_timed_out(self):
        """
        Function is called when the handling of the request times out
        :return:
        """
        time_used = int(os.getenv(handlers.ENV_LAMBDA_TIMEOUT, 900)) - int(
            (self._context.get_remaining_time_in_millis() / 1000))
        self._logger.error(ERR_TIMEOUT_SELECT_OR_STORE, time_used,
                           self.task[handlers.TASK_NAME])
        self._timeout_event.set()
        self._logger.flush()
        self._timer.cancel()

    def start_timer(self, remaining):
        execution_time_left = (self._context.get_remaining_time_in_millis() /
                               1000.00) - remaining
        self._timer = threading.Timer(execution_time_left,
                                      self.select_timed_out)
        self._timer.start()

    def is_timed_out(self):
        return self._timeout_event is not None and self._timeout_event.is_set()

    def _build_describe_argument(self):
        """
        Build the argument for the describe call that selects the resources
        :return: arguments for describe call
        """
        args = {}
        # get the mapping for parameters that should be used as parameters to the describe method call to select the resources
        action_parameters = self.action_properties.get(
            actions.ACTION_PARAMETERS, {})
        for p in [
                p for p in action_parameters if action_parameters[p].get(
                    actions.PARAM_DESCRIBE_PARAMETER) is not None
        ]:

            if self.task_parameters.get(p) is not None:
                args[action_parameters[p]
                     [actions.
                      PARAM_DESCRIBE_PARAMETER]] = self.task_parameters[p]

        # also add describe method parameters specified as select parameters in the metadata of the action
        select_parameters = self.action_properties.get(
            actions.ACTION_SELECT_PARAMETERS, {})
        if types.FunctionType == type(select_parameters):
            select_parameters = select_parameters(self.task,
                                                  self.task_parameters)
        for p in select_parameters:
            args[p] = select_parameters[p]

        # region and account are separate describe parameters
        args.update({
            a: self.select_args[a]
            for a in self.select_args if a not in [
                handlers.HANDLER_EVENT_REGIONS, handlers.HANDLER_EVENT_ACCOUNT,
                handlers.HANDLER_EVENT_RESOURCE_NAME
            ]
        })
        # action specified select jmes-path expression for resources
        if actions.ACTION_SELECT_EXPRESSION in self.action_properties:
            # replace parameter placeholders with values. We cant use str.format here are the jmespath expression may contain {}
            # as well for projection of attributes, so the use placeholders for parameter names in format %paramname%
            jmes = self.action_properties[actions.ACTION_SELECT_EXPRESSION]
            for p in self.task_parameters:
                jmes = jmes.replace("%{}%".format(p),
                                    str(self.task_parameters[p]))
            args["select"] = jmes
        return args
Ejemplo n.º 15
0
def _get_logger(context):
    dt = datetime.utcnow()
    logstream = LOG_STREAM.format(dt.year, dt.month, dt.day)
    return QueuedLogger(logstream=logstream, buffersize=10, context=context)
Ejemplo n.º 16
0
class ConfigurationResourceHandler(CustomResource):
    def __init__(self, event, context):
        CustomResource.__init__(self, event, context)

        self.arguments = copy(self.resource_properties)
        self.arguments = {
            a: self.resource_properties[a]
            for a in self.resource_properties
            if a not in ["ServiceToken", "Timeout"]
        }
        # setup logging
        dt = datetime.utcnow()
        classname = self.__class__.__name__
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream,
                                    context=context,
                                    buffersize=20)

    @classmethod
    def is_handling_request(cls, event, _):
        return event.get("StackId") is not None and event.get(
            "ResourceType") == "Custom::TaskConfig"

    def handle_request(self):

        start = datetime.now()

        self._logger.info("Cloudformation request is {}",
                          safe_json(self._event, indent=2))

        try:
            result = CustomResource.handle_request(self)

            return safe_dict({
                "datetime":
                datetime.now().isoformat(),
                "running-time": (datetime.now() - start).total_seconds(),
                "result":
                result
            })

        finally:
            self._logger.flush()

    def _create_request(self):

        name = self.resource_properties[CONFIG_TASK_NAME]
        try:
            self._logger.info("Creating new Task resource with name {}", name)
            self.physical_resource_id = name
            self.task = create_task(**self.arguments)
            self._logger.info(
                "Created new resource with physical resource id {}",
                self.physical_resource_id)
            return True

        except Exception as ex:
            self.response["Reason"] = str(ex)
            self._logger.error(ERR_CREATING_TASK_, name, ex)
            return False

    def _update_request(self):

        self._logger.info("Updating Task resource")
        name = self.resource_properties.get(CONFIG_TASK_NAME)
        try:
            if name is None:
                raise_exception(ERR_NO_TASK_NAME_RESOURCE_PROPERTY)

            if name != self.physical_resource_id:
                self._logger.info(
                    "Name change for resource with physical resource id {}, new value is {}",
                    name, self.physical_resource_id)
                self.arguments[CONFIG_TASK_NAME] = name
                create_task(**self.arguments)
                self.physical_resource_id = name
                self._logger.info(
                    "Created new resource with physical resource id {}",
                    self.physical_resource_id)
            else:
                update_task(name, **self.arguments)
                self._logger.info(
                    "Updated resource with physical resource id {}",
                    self.physical_resource_id)
            return True

        except Exception as ex:
            self.response["Reason"] = str(ex)
            self._logger.error(ERR_UPDATING_TASK, name, ex)
            return False

    def _delete_request(self):

        self._logger.info("Deleting Task resource")
        name = self.resource_properties.get(CONFIG_TASK_NAME)
        try:
            self._logger.info("Task name is {}, physical resource id is {}",
                              name, self.physical_resource_id)
            # as the task can be part of a different stack than the scheduler that owns the configuration table the table could
            # be deleted by that stack, so first check if the table still exists
            if TaskConfiguration.config_table_exists():
                delete_task(self.physical_resource_id)
                self._logger.info(
                    "Deleted resource {} with physical resource id {}", name,
                    self.physical_resource_id)
            else:
                self._logger.info(
                    "Configuration table does not longer exist so deletion of item skipped"
                )
            return True

        except Exception as ex:
            self.response["Reason"] = str(ex)
            self._logger.error(ERR_DELETING_TASK, name, ex)
            return False
class CompletionHandler(object):
    """
    Class that handles time based events from CloudWatch rules
    """
    def __init__(self, event, context):
        """
        Initializes the instance.
        :param event: event to handle
        :param context: Lambda context
        """
        self._context = context
        self._event = event
        self._table = None

        # Setup logging
        classname = self.__class__.__name__
        dt = datetime.utcnow()
        logstream = LOG_STREAM.format(classname, dt.year, dt.month, dt.day)
        self._logger = QueuedLogger(logstream=logstream,
                                    buffersize=20,
                                    context=context)

    @classmethod
    def is_handling_request(cls, event, _):
        """
        Tests if event is handled by instance of this handler.
        :param _:
        :param event: Tested event
        :return: True if the event is a cloudwatch rule event for task completion
        """
        source = event.get(handlers.HANDLER_EVENT_SOURCE, "")
        if source != "aws.events":
            return False

        resources = event.get("resources", [])
        if len(resources) == 1 and resources[0].partition("/")[2].lower(
        ) == os.getenv(handlers.ENV_COMPLETION_RULE).lower():
            return True
        return False

    def handle_request(self):
        """
        Handles the cloudwatch rule timer event
        :return: Started tasks, if any, information
        """

        try:

            start = datetime.now()

            count = 0
            tracking_table = TaskTrackingTable(context=self._context,
                                               logger=self._logger)

            for task in tracking_table.get_tasks_to_check_for_completion():
                count += 1

                task_id = task[handlers.TASK_TR_ID]
                last_check_for_completion_time = datetime.now().isoformat()
                tracking_table.update_task(
                    task_id,
                    task=task.get(handlers.TASK_TR_NAME, None),
                    task_metrics=task.get(handlers.TASK_TR_METRICS, False),
                    status_data={
                        handlers.TASK_TR_LAST_WAIT_COMPLETION:
                        last_check_for_completion_time
                    })

                self._logger.debug("Task is {}", task)
                self._logger.info(INF_SET_COMPLETION_TASK_TIMER,
                                  task.get(handlers.TASK_TR_NAME, None),
                                  task_id, last_check_for_completion_time)

            running_time = float((datetime.now() - start).total_seconds())
            self._logger.info(INF_COMPLETION_ITEMS_SET, running_time, count)

            if count == 0 and not handlers.running_local(self._context):
                rule = handlers.disable_completion_cloudwatch_rule(
                    self._context)
                self._logger.info(INF_DISABLED_COMPLETION_TIMER, rule)

            return safe_dict({
                "datetime": datetime.now().isoformat(),
                "running-time": running_time,
                "tasks-to-check": count
            })

        except ValueError as ex:
            self._logger.error(ERR_COMPLETION_HANDLER, ex,
                               safe_json(self._event, indent=2))

        finally:
            self._logger.flush()