コード例 #1
0
class AlertMerger(object):
    """Dispatch alerts to the alert processor."""
    ALERT_MERGER = None  # AlertMerger instance which can be re-used across Lambda invocations

    # Async invocations of Lambda functions are capped at 128KB.
    # Set the max payload size to slightly under that to account for the rest of the message.
    MAX_LAMBDA_PAYLOAD_SIZE = 126000

    @classmethod
    def get_instance(cls):
        """Get an instance of the AlertMerger, using a cached version if possible."""
        if not cls.ALERT_MERGER:
            cls.ALERT_MERGER = AlertMerger()
        return cls.ALERT_MERGER

    def __init__(self):
        self.table = AlertTable(os.environ['ALERTS_TABLE'])
        self.alert_proc = os.environ['ALERT_PROCESSOR']
        self.alert_proc_timeout = int(
            os.environ['ALERT_PROCESSOR_TIMEOUT_SEC'])
        self.lambda_client = boto3.client('lambda')

    def _get_alerts(self, rule_name):
        """Build a list of Alert instances triggered from the given rule name."""
        alerts = []

        for record in self.table.get_alert_records(rule_name,
                                                   self.alert_proc_timeout):
            try:
                alerts.append(Alert.create_from_dynamo_record(record))
            except AlertCreationError:
                LOGGER.exception('Invalid alert record %s', record)

        return alerts

    @staticmethod
    def _merge_groups(alerts):
        """Gather alerts into groupings which can be merged together and sent now.

        Args:
            alerts (list): List of Alert instances with defined merge configuration.

        Returns:
            list<AlertMergeGroup>: Each returned merge group has the following properties:
                (1) The oldest alert is older than its merge window (i.e. should be sent now), AND
                (2) All alerts in the merge group fit within a single merge window, AND
                (3) All alerts in the merge group have the same values for all of their merge keys.

            Alerts which are too recent to fit in any merge group are excluded from the results.
        """
        merge_groups = []

        for alert in sorted(alerts):
            # Iterate over alerts (in order of creation) and try to add them to each merge group.
            if not any(group.add(alert) for group in merge_groups):
                # The alert doesn't fit in any merge group - try creating a new one.
                if datetime.utcnow() < alert.created + alert.merge_window:
                    # This alert is too recent - no other alerts can be merged. Stop here.
                    break
                merge_groups.append(AlertMergeGroup(alert))

        return merge_groups

    def _dispatch_alert(self, alert):
        """Dispatch a single alert to the alert processor."""
        alert.attempts += 1
        LOGGER.info('Dispatching %s to %s (attempt %d)', alert,
                    self.alert_proc, alert.attempts)
        MetricLogger.log_metric(ALERT_MERGER_NAME, MetricLogger.ALERT_ATTEMPTS,
                                alert.attempts)

        record_payload = json.dumps(alert.dynamo_record(),
                                    cls=Alert.AlertEncoder,
                                    separators=(',', ':'))
        if len(record_payload) <= self.MAX_LAMBDA_PAYLOAD_SIZE:
            # The entire alert fits in the Lambda payload - send it all
            payload = record_payload
        else:
            # The alert is too big - the alert processor will have to pull it from Dynamo
            payload = json.dumps(alert.dynamo_key)

        self.lambda_client.invoke(FunctionName=self.alert_proc,
                                  InvocationType='Event',
                                  Payload=payload,
                                  Qualifier='production')

        alert.dispatched = datetime.utcnow()
        self.table.mark_as_dispatched(alert)

    def dispatch(self):
        """Find and dispatch all pending alerts to the alert processor."""
        # To reduce the API calls to Dynamo, batch all additions and deletions until the end.
        merged_alerts = []  # List of newly created merge alerts
        alerts_to_delete = []  # List of alerts which can be deleted

        # TODO: Find a way to avoid a full table scan just to get rule names
        for rule_name in self.table.rule_names():
            alerts = self._get_alerts(rule_name)
            if not alerts:
                continue

            merge_enabled_alerts = []
            for alert in alerts:
                if alert.remaining_outputs:
                    # If an alert still has pending outputs, it needs to be sent immediately.
                    # For example, all alerts are sent to the default firehose now even if they will
                    # later be merged when sending to other outputs.
                    self._dispatch_alert(alert)
                elif alert.merge_enabled:
                    # This alert has finished sending to non-merged outputs; it is now a candidate
                    # for alert merging.
                    merge_enabled_alerts.append(alert)
                else:
                    # This alert has sent successfully but doesn't need to be merged.
                    # It should have been deleted by the alert processor, but we can do it now.
                    alerts_to_delete.append(alert)

            for group in self._merge_groups(merge_enabled_alerts):
                # Create a new merged Alert.
                new_alert = Alert.merge(group.alerts)
                LOGGER.info('Merged %d alerts into a new alert with ID %s',
                            len(group.alerts), new_alert.alert_id)
                merged_alerts.append(new_alert)

                # Since we already guaranteed that the original alerts have sent to the unmerged
                # outputs (e.g. default firehose), they can be safely marked for deletion.
                alerts_to_delete.extend(group.alerts)

        if merged_alerts:
            # Add new merged alerts to the alerts table and send them to the alert processor.
            self.table.add_alerts(merged_alerts)
            for alert in merged_alerts:
                self._dispatch_alert(alert)

        if alerts_to_delete:
            self.table.delete_alerts([(alert.rule_name, alert.alert_id)
                                      for alert in alerts_to_delete])
コード例 #2
0
class AlertProcessor(object):
    """Orchestrates delivery of alerts to the appropriate dispatchers."""
    ALERT_PROCESSOR = None  # AlertProcessor instance which can be re-used across Lambda invocations
    BACKOFF_MAX_TRIES = 5

    @classmethod
    def get_instance(cls):
        """Get an instance of the AlertProcessor, using a cached version if possible."""
        if not cls.ALERT_PROCESSOR:
            cls.ALERT_PROCESSOR = AlertProcessor()
        return cls.ALERT_PROCESSOR

    def __init__(self):
        """Initialization logic that can be cached across invocations"""
        # Merge user-specified output configuration with the required output configuration
        output_config = load_config(include={'outputs.json'})['outputs']
        self.config = resources.merge_required_outputs(output_config, env['STREAMALERT_PREFIX'])

        self.alerts_table = AlertTable(env['ALERTS_TABLE'])

    def _create_dispatcher(self, output):
        """Create a dispatcher for the given output.

        Args:
            output (str): Alert output, e.g. "aws-sns:topic-name"

        Returns:
            OutputDispatcher: Based on the output type.
                Returns None if the output is invalid or not defined in the config.
        """
        try:
            service, descriptor = output.split(':')
        except ValueError:
            LOGGER.error('Improperly formatted output [%s]. Outputs for rules must '
                         'be declared with both a service and a descriptor for the '
                         'integration (ie: \'slack:my_channel\')', output)
            return None

        if service not in self.config or descriptor not in self.config[service]:
            LOGGER.error('The output \'%s\' does not exist!', output)
            return None

        return StreamAlertOutput.create_dispatcher(service, self.config)

    def _send_to_outputs(self, alert):
        """Send an alert to each remaining output.

        Args:
            alert (Alert): Alert to send

        Returns:
            dict: Maps output (str) to whether it sent successfully (bool)
        """
        result = {}

        for output in alert.remaining_outputs:
            dispatcher = self._create_dispatcher(output)
            result[output] = dispatcher.dispatch(alert, output) if dispatcher else False

        alert.outputs_sent = set(output for output, success in result.items() if success)
        return result

    @backoff.on_exception(backoff.expo, ClientError,
                          max_tries=BACKOFF_MAX_TRIES, jitter=backoff.full_jitter,
                          on_backoff=backoff_handlers.backoff_handler(),
                          on_success=backoff_handlers.success_handler(),
                          on_giveup=backoff_handlers.giveup_handler())
    def _update_table(self, alert, output_results):
        """Update the alerts table based on the results of the outputs.

        Args:
            alert (Alert): Alert instance which was sent
            output_results (dict): Maps output (str) to whether it sent successfully (bool)
        """
        if not output_results:
            return

        if all(output_results.values()) and not alert.merge_enabled:
            # All outputs sent successfully and the alert will not be merged later - delete it now
            self.alerts_table.delete_alerts([(alert.rule_name, alert.alert_id)])
        elif any(output_results.values()):
            # At least one output succeeded - update table accordingly
            self.alerts_table.update_sent_outputs(alert)
        # else: If all outputs failed, no table updates are necessary

    def run(self, event):
        """Run the alert processor!

        Args:
            event (dict): Lambda invocation event containing at least the rule name and alert ID.

        Returns:
            dict: Maps output (str) to whether it sent successfully (bool).
                An empty dict is returned if the Alert was improperly formatted.
        """
        # Grab the alert record from Dynamo (if needed).
        if set(event) == {'AlertID', 'RuleName'}:
            LOGGER.info('Retrieving %s from alerts table', event)
            alert_record = self.alerts_table.get_alert_record(event['RuleName'], event['AlertID'])
            if not alert_record:
                LOGGER.error('%s does not exist in the alerts table', event)
                return {}
        else:
            alert_record = event

        # Convert record to an Alert instance.
        try:
            alert = Alert.create_from_dynamo_record(alert_record)
        except AlertCreationError:
            LOGGER.exception('Invalid alert %s', event)
            return {}

        # Remove normalization key from the record.
        # TODO: Consider including this in at least some outputs, e.g. default Athena firehose
        if Normalizer.NORMALIZATION_KEY in alert.record:
            del alert.record[Normalizer.NORMALIZATION_KEY]

        result = self._send_to_outputs(alert)
        self._update_table(alert, result)
        return result