Example #1
0
    def _load_rule_table(cls, config):
        """Load and return a RuleTable class for communicating with the DynamoDB rule table

        Args:
            config (dict): Loaded configuration from 'conf/' directory

        Returns:
            rule_table.RuleTable: Loaded frontend for DynamoDB rules table
        """
        # Ensure the rules table is enabled
        rt_config = config['global']['infrastructure']['rules_table']
        if not rt_config.get('enabled', False):
            return

        now = datetime.utcnow()
        refresh_delta = timedelta(
            minutes=rt_config.get('cache_refresh_minutes', 10))

        # The rule table will need 'refreshed' if the refresh interval has been surpassed
        needs_refresh = cls._RULE_TABLE_LAST_REFRESH + refresh_delta < now

        if not needs_refresh:
            LOGGER.debug(
                'Rule table does not need refreshed (last refresh time: %s; '
                'current time: %s)', cls._RULE_TABLE_LAST_REFRESH, now)
            return

        LOGGER.info(
            'Refreshing rule table (last refresh time: %s; current time: %s)',
            cls._RULE_TABLE_LAST_REFRESH, now)

        table_name = '{}_streamalert_rules'.format(
            config['global']['account']['prefix'])
        cls._RULE_TABLE = RuleTable(table_name)
        cls._RULE_TABLE_LAST_REFRESH = now
Example #2
0
    def process(cls, input_payload):
        """Process rules on a record.

        Gather a list of rules based on the record's datasource type.
        For each rule, evaluate the record through all listed matchers
        and the rule itself to determine if a match occurs.

        Returns:
            list: alerts

            An alert is represented as a dictionary with the following keys:
                rule_name: the name of the triggered rule
                payload: the StreamPayload object
                outputs: list of outputs to send to
        """
        alerts = []
        payload = copy(input_payload)

        rules = [
            rule_attrs for rule_attrs in cls.__rules.values()
            if payload.log_source in rule_attrs.logs
        ]

        if not rules:
            LOGGER.debug('No rules to process for %s', payload)
            return alerts

        for record in payload.records:
            for rule in rules:
                # subkey check
                has_sub_keys = cls.process_subkeys(record, payload.type, rule)
                if not has_sub_keys:
                    continue

                # matcher check
                matcher_result = cls.match_event(record, rule)
                if not matcher_result:
                    continue

                # rule analysis
                rule_result = cls.process_rule(record, rule)
                if rule_result:
                    LOGGER.info(
                        'Rule [%s] triggered an alert on log type [%s] from entity \'%s\' '
                        'in service \'%s\'', rule.rule_name,
                        payload.log_source, payload.entity, payload.service())
                    alert = {
                        'record': record,
                        'rule_name': rule.rule_name,
                        'rule_description': rule.rule_function.__doc__
                        or DEFAULT_RULE_DESCRIPTION,
                        'log_source': str(payload.log_source),
                        'log_type': payload.type,
                        'outputs': rule.outputs,
                        'source_service': payload.service(),
                        'source_entity': payload.entity
                    }
                    alerts.append(alert)

        return alerts
    def _validate_type_mapping(mapping_str):
        """Static method to extract normalized type and IOC type from qualified str

        Args:
            mapping_str (str): A qualified string has pattern 'normalized_type:ioc_type'

        Returns:
            A tuple(bool, str, str)
            bool: First return indicate if the string a qualifited string contains
                both normalized CEF type and IOC type.
            str: Second return is normalized type.
            str: Last return is IOC type.
        """
        normalized_type = None
        ioc_type = None

        splitted_str = mapping_str.split(':')
        if len(splitted_str) == 1:
            normalized_type = splitted_str[0]
        elif len(splitted_str) == 2:
            normalized_type = splitted_str[0]
            ioc_type = splitted_str[1].split('_')[-1]
        else:
            LOGGER.info('Key %s in conf/types.json is incorrect', mapping_str)
            return False, None, None

        if normalized_type and ioc_type:
            return True, normalized_type, ioc_type

        return False, normalized_type, None
Example #4
0
    def rule_analysis(record, rule, payload, alerts):
        """Class method to analyze rule against a record

        Args:
            record (dict): A parsed log with data.
            rule: Rule attributes.
            payload: The StreamPayload object.
            alerts (list): A list of alerts which will be sent to alert processor.

        Returns:
            (dict): A list of alerts.
        """
        rule_result = StreamRules.process_rule(record, rule)
        if rule_result:
            if StreamRules.check_alerts_duplication(record, rule, alerts):
                return

            LOGGER.info(
                'Rule [%s] triggered an alert on log type [%s] from entity \'%s\' '
                'in service \'%s\'', rule.rule_name, payload.log_source,
                payload.entity, payload.service())
            alert = {
                'record': record,
                'rule_name': rule.rule_name,
                'rule_description': rule.rule_function.__doc__
                or DEFAULT_RULE_DESCRIPTION,
                'log_source': str(payload.log_source),
                'log_type': payload.type,
                'outputs': rule.outputs,
                'source_service': payload.service(),
                'source_entity': payload.entity,
                'context': rule.context
            }

            alerts.append(alert)
Example #5
0
 def firehose_request_wrapper(data):
     """Firehose request wrapper to use with backoff"""
     LOGGER.info('[Firehose] Sending %d records to %s',
                 record_batch_size,
                 stream_name)
     return self._firehose_client.put_record_batch(
         DeliveryStreamName=stream_name,
         Records=data)
Example #6
0
 def _send_to_dynamo(self, alerts):
     """Write alerts in batches to Dynamo."""
     # The batch_writer() automatically handles buffering, batching, and retrying failed items
     with self.table.batch_writer() as batch:
         for alert in alerts:
             batch.put_item(Item=self.dynamo_record(alert))
     LOGGER.info('Successfully sent %d alert(s) to dynamo:%s', len(alerts),
                 self.table.table_name)
Example #7
0
    def _download_object(self, region, bucket, key):
        """Download an object from S3.

        Verifies the S3 object is less than or equal to 128MB, and
        downloads it into a temp file.  Lambda can only execute for a
        maximum of 300 seconds, and the file to download
        greatly impacts that time.

        Args:
            region (str): AWS region to use for boto client instance.
            bucket (str): S3 bucket to download object from.
            key (str): Key of s3 object.

        Returns:
            str: The downloaded path of the S3 object.
        """
        size_kb = self.s3_object_size / 1024.0
        size_mb = size_kb / 1024.0
        display_size = '{}MB'.format(size_mb) if size_mb else '{}KB'.format(
            size_kb)

        # File size checks before downloading
        if size_kb == 0:
            return
        elif size_mb > 128:
            raise S3ObjectSizeError(
                '[S3Payload] The S3 object {}/{} is too large [{}] to download '
                'from S3'.format(bucket, key, display_size))

        # Bandit warns about using a shell process, ignore with #nosec
        LOGGER.debug(os.popen('df -h /tmp | tail -1').read().strip())  # nosec
        LOGGER.info('[S3Payload] Starting download from S3: %s/%s [%s]',
                    bucket, key, display_size)

        # Convert the S3 object name to store as a file in the Lambda container
        suffix = key.replace('/', '-')
        file_descriptor, downloaded_s3_object = tempfile.mkstemp(suffix=suffix)

        with open(downloaded_s3_object, 'wb') as data:
            client = boto3.client('s3', region_name=region)
            start_time = time.time()
            client.download_fileobj(bucket, key, data)

        # Explicitly call os.close on the underlying open file descriptor
        # Addresses https://github.com/airbnb/streamalert/issues/587
        os.close(file_descriptor)

        total_time = time.time() - start_time
        LOGGER.info('Completed download in %s seconds', round(total_time, 2))

        # Log a metric on how long this object took to download
        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.S3_DOWNLOAD_TIME,
                                total_time)

        return downloaded_s3_object
Example #8
0
    def sink(self, alerts):
        """Sink triggered alerts from the StreamRules engine.

        Args:
            alerts (list): a list of dictionaries representating json alerts

        Sends a message to the alert processor with the following JSON format:
            {
                "record": record,
                "metadata": {
                    "rule_name": rule.rule_name,
                    "rule_description": rule.rule_function.__doc__,
                    "log": str(payload.log_source),
                    "outputs": rule.outputs,
                    "type": payload.type,
                    "source": {
                        "service": payload.service,
                        "entity": payload.entity
                    }
                }
            }
        """
        for alert in alerts:
            try:
                data = json.dumps(alert, default=lambda o: o.__dict__)
            except AttributeError as err:
                LOGGER.error(
                    'An error occurred while dumping alert to JSON: %s '
                    'Alert: %s', err.message, alert)
                continue

            try:
                response = self.client_lambda.invoke(
                    FunctionName=self.function,
                    InvocationType='Event',
                    Payload=data,
                    Qualifier='production')

            except ClientError as err:
                LOGGER.exception(
                    'An error occurred while sending alert to '
                    '\'%s:production\'. Error is: %s. Alert: %s',
                    self.function, err.response, data)
                continue

            if response['ResponseMetadata']['HTTPStatusCode'] != 202:
                LOGGER.error('Failed to send alert to \'%s\': %s',
                             self.function, data)
                continue

            if self.env['lambda_alias'] != 'development':
                LOGGER.info(
                    'Sent alert to \'%s\' with Lambda request ID \'%s\'',
                    self.function, response['ResponseMetadata']['RequestId'])
Example #9
0
    def _firehose_request_helper(self, stream_name, record_batch):
        """Send record batches to Firehose

        Args:
            stream_name (str): The name of the Delivery Stream to send to
            record_batch (list): The records to send
        """
        record_batch_size = len(record_batch)
        resp = {}

        try:
            LOGGER.debug('Sending %d records to Firehose:%s',
                         record_batch_size,
                         stream_name)
            resp = self.firehose_client.put_record_batch(
                DeliveryStreamName=stream_name,
                # The newline at the end is required by Firehose,
                # otherwise all records will be on a single line and
                # unsearchable in Athena.
                Records=[{'Data': json.dumps(self.sanitize_keys(record),
                                             separators=(",", ":")) + '\n'}
                         for record
                         in record_batch])
        except ClientError as firehose_err:
            LOGGER.error(firehose_err)
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    record_batch_size)
            return

        # Error handle if failures occured in PutRecordBatch
        # TODO(jack) implement backoff here for additional message reliability
        if resp.get('FailedPutCount') > 0:
            failed_records = [failed
                              for failed
                              in resp['RequestResponses']
                              if failed.get('ErrorCode')]
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    resp['FailedPutCount'])
            # Only print the first 100 failed records to Cloudwatch logs
            LOGGER.error('The following records failed to Put to the'
                         'Delivery stream %s: %s',
                         stream_name,
                         json.dumps(failed_records[:100], indent=2))
        else:
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_RECORDS_SENT,
                                    record_batch_size)
            LOGGER.info('Successfully sent %d messages to Firehose:%s',
                        record_batch_size,
                        stream_name)
Example #10
0
 def firehose_request_wrapper():
     """Firehose request wrapper to use with backoff"""
     LOGGER.info('[Firehose] Sending %d records to %s',
                 record_batch_size,
                 stream_name)
     return self._firehose_client.put_record_batch(
         DeliveryStreamName=stream_name,
         # The newline at the end is required by Firehose,
         # otherwise all records will be on a single line and
         # unsearchable in Athena.
         Records=[{'Data': json.dumps(self.sanitize_keys(record),
                                      separators=(",", ":")) + '\n'}
                  for record
                  in record_batch])
Example #11
0
        def firehose_request_wrapper(data):
            """Firehose request wrapper to use with backoff"""
            # Use the current length of data here so we can track failed records that are retried
            LOGGER.info('[Firehose] Sending %d records to %s', len(data), stream_name)

            response = self._client.put_record_batch(DeliveryStreamName=stream_name, Records=data)

            # Log this as an error for now so it can be picked up in logs
            if response['FailedPutCount'] > 0:
                LOGGER.error('Received non-zero FailedPutCount: %d', response['FailedPutCount'])
                # Strip out the successful records so only the failed ones are retried. This happens
                # to the list of dictionary objects, so the called function sees the updated list
                self._strip_successful_records(data, response)

            return response
Example #12
0
    def send_alerts(self, alerts):
        """Send alerts to the Dynamo table.

        Args:
            alerts (list): A list of Alert instances to save to Dynamo.
        """
        try:
            self._table.add_alerts(alerts)
            LOGGER.info('Successfully sent %d alert(s) to dynamo:%s',
                        len(alerts), self._table.name)
        except ClientError:
            # add_alerts() automatically retries transient errors - any raised ClientError
            # is likely unrecoverable. Log an exception and metric
            LOGGER.exception('Error saving alerts to Dynamo')
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FAILED_DYNAMO_WRITES, 1)
Example #13
0
    def rule_analysis(self, record, rule, payload, alerts):
        """Analyze a rule against the record, adding a new alert if applicable.

        Args:
            record (dict): A parsed log with data.
            rule (RuleAttributes): Attributes for the rule which triggered the alert.
            payload (StreamPayload): Payload with information about the source of the record.
            alerts (list): The current list of Alert instances.
                If the rule returns True on the record, a new Alert instance is added to this list.
        """
        rule_result = rule.process(record)
        if not rule_result:
            return

        # when threat intel enabled, normalized records will be re-analyzed by
        # all rules. Thus we need to check duplication.
        if self._threat_intel and self.check_alerts_duplication(
                record, rule, alerts):
            return

        # Check if the rule is staged and, if so, only use the required alert outputs
        if rule.is_staged(self._RULE_TABLE):
            all_outputs = self._required_outputs_set
        else:  # Otherwise, combine the required alert outputs with the ones for this rule
            all_outputs = self._required_outputs_set.union(rule.outputs_set)

        alert = Alert(rule.name,
                      record,
                      all_outputs,
                      cluster=os.environ['CLUSTER'],
                      context=rule.context,
                      log_source=str(payload.log_source),
                      log_type=payload.type,
                      merge_by_keys=rule.merge_by_keys,
                      merge_window=timedelta(minutes=rule.merge_window_mins),
                      rule_description=rule.description,
                      source_entity=payload.entity,
                      source_service=payload.service(),
                      staged=rule.is_staged(self._RULE_TABLE))

        LOGGER.info(
            'Rule [%s] triggered alert [%s] on log type [%s] from entity \'%s\' '
            'in service \'%s\'', rule.name, alert.alert_id, payload.log_source,
            payload.entity, payload.service())

        alerts.append(alert)
Example #14
0
    def _download_object(self, region, bucket, key):
        """Download an object from S3.

        Verifies the S3 object is less than or equal to 128MB, and
        downloads it into a temp file.  Lambda can only execute for a
        maximum of 300 seconds, and the file to download
        greatly impacts that time.

        Args:
            region (str): AWS region to use for boto client instance.
            bucket (str): S3 bucket to download object from.
            key (str): Key of s3 object.

        Returns:
            str: The downloaded path of the S3 object.
        """
        size_kb = self.s3_object_size / 1024.0
        size_mb = size_kb / 1024.0
        if size_mb > 128:
            raise S3ObjectSizeError('S3 object to download is above 128MB')

        # Bandit warns about using a shell process, ignore with #nosec
        LOGGER.debug(os.popen('df -h /tmp | tail -1').read().strip())  # nosec

        display_size = '{}MB'.format(size_mb) if size_mb else '{}KB'.format(
            size_kb)

        LOGGER.info('Starting download from S3: %s/%s [%s]', bucket, key,
                    display_size)

        suffix = key.replace('/', '-')
        _, downloaded_s3_object = tempfile.mkstemp(suffix=suffix)
        with open(downloaded_s3_object, 'wb') as data:
            client = boto3.client('s3', region_name=region)
            start_time = time.time()
            client.download_fileobj(bucket, key, data)

        total_time = time.time() - start_time
        LOGGER.info('Completed download in %s seconds', round(total_time, 2))

        # Log a metric on how long this object took to download
        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.S3_DOWNLOAD_TIME,
                                total_time)

        return downloaded_s3_object
Example #15
0
    def rule_analysis(self, record, rule, payload, alerts):
        """Class method to analyze rule against a record

        Args:
            record (dict): A parsed log with data.
            rule: Rule attributes.
            payload: The StreamPayload object.
            alerts (list): A list of alerts which will be sent to alert processor.

        Returns:
            dict: A list of alerts.
        """
        rule_result = StreamRules.process_rule(record, rule)
        if rule_result:
            if StreamRules.check_alerts_duplication(record, rule, alerts):
                return

            alert_id = str(uuid.uuid4())  # Random unique alert ID
            LOGGER.info(
                'Rule [%s] triggered alert [%s] on log type [%s] from entity \'%s\' '
                'in service \'%s\'', rule.rule_name, alert_id,
                payload.log_source, payload.entity, payload.service())

            # Combine the required alert outputs with the ones for this rule
            all_outputs = self._required_outputs_set.union(
                set(rule.outputs or []))

            alert = {
                'id': alert_id,
                'record': record,
                'rule_name': rule.rule_name,
                'rule_description': rule.rule_function.__doc__
                or DEFAULT_RULE_DESCRIPTION,
                'log_source': str(payload.log_source),
                'log_type': payload.type,
                'outputs':
                list(all_outputs),  # TODO: @austinbyers - change this to a set
                'source_service': payload.service(),
                'source_entity': payload.entity,
                'context': rule.context
            }

            alerts.append(alert)
Example #16
0
    def _firehose_request_helper(self, stream_name, record_batch):
        """Send record batches to Firehose

        Args:
            stream_name (str): The name of the Delivery Stream to send to
            record_batch (list): The records to send
        """
        resp = {}
        record_batch_size = len(record_batch)
        exceptions_to_backoff = (ClientError, ConnectionError)

        @backoff.on_predicate(backoff.fibo,
                              lambda resp: resp['FailedPutCount'] > 0,
                              max_tries=self.MAX_BACKOFF_ATTEMPTS,
                              max_value=self.MAX_BACKOFF_FIBO_VALUE,
                              jitter=backoff.full_jitter,
                              on_backoff=backoff_handler,
                              on_success=success_handler,
                              on_giveup=giveup_handler)
        @backoff.on_exception(backoff.fibo,
                              exceptions_to_backoff,
                              max_tries=self.MAX_BACKOFF_ATTEMPTS,
                              jitter=backoff.full_jitter,
                              on_backoff=backoff_handler,
                              on_success=success_handler,
                              on_giveup=giveup_handler)
        def firehose_request_wrapper(data):
            """Firehose request wrapper to use with backoff"""
            LOGGER.info('[Firehose] Sending %d records to %s',
                        record_batch_size,
                        stream_name)
            return self._firehose_client.put_record_batch(
                DeliveryStreamName=stream_name,
                Records=data)

        # The newline at the end is required by Firehose,
        # otherwise all records will be on a single line and
        # unsearchable in Athena.
        records_data = [
            {'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'}
            for record in record_batch
        ]

        # The try/except here is to catch the raised error at the
        # end of the backoff.
        try:
            resp = firehose_request_wrapper(records_data)
        except exceptions_to_backoff as firehose_err:
            LOGGER.error(firehose_err)
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    record_batch_size)
            return

        # Error handle if failures occurred in PutRecordBatch after
        # several backoff attempts
        if resp.get('FailedPutCount') > 0:
            failed_records = [failed
                              for failed
                              in resp['RequestResponses']
                              if failed.get('ErrorCode')]
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    resp['FailedPutCount'])
            # Only print the first 100 failed records to Cloudwatch logs
            LOGGER.error('[Firehose] The following records failed to put to '
                         'the Delivery Stream %s: %s',
                         stream_name,
                         json.dumps(failed_records[:100], indent=2))
        else:
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_RECORDS_SENT,
                                    record_batch_size)
            LOGGER.info('[Firehose] Successfully sent %d messages to %s with RequestId [%s]',
                        record_batch_size,
                        stream_name,
                        resp.get('ResponseMetadata', {}).get('RequestId', ''))
Example #17
0
    def run(self, event):
        """StreamAlert Lambda function handler.

        Loads the configuration for the StreamAlert function which contains
        available data sources, log schemas, normalized types, and outputs.
        Classifies logs sent into a parsed type.
        Matches records against rules.

        Args:
            event (dict): An AWS event mapped to a specific source/entity
                containing data read by Lambda.

        Returns:
            bool: True if all logs being parsed match a schema
        """
        records = event.get('Records', [])
        LOGGER.debug('Number of incoming records: %d', len(records))
        if not records:
            return False

        firehose_config = self.config['global'].get('infrastructure',
                                                    {}).get('firehose', {})
        if firehose_config.get('enabled'):
            self._firehose_client = StreamAlertFirehose(
                self.env['lambda_region'], firehose_config,
                self.config['logs'])

        payload_with_normalized_records = []
        for raw_record in records:
            # Get the service and entity from the payload. If the service/entity
            # is not in our config, log and error and go onto the next record
            service, entity = self.classifier.extract_service_and_entity(
                raw_record)
            if not service:
                LOGGER.error(
                    'No valid service found in payload\'s raw record. Skipping '
                    'record: %s', raw_record)
                continue

            if not entity:
                LOGGER.error(
                    'Unable to extract entity from payload\'s raw record for service %s. '
                    'Skipping record: %s', service, raw_record)
                continue

            # Cache the log sources for this service and entity on the classifier
            if not self.classifier.load_sources(service, entity):
                continue

            # Create the StreamPayload to use for encapsulating parsed info
            payload = load_stream_payload(service, entity, raw_record)
            if not payload:
                continue

            payload_with_normalized_records.extend(
                self._process_alerts(payload))

        LOGGER.info('Got %d normalized records',
                    len(payload_with_normalized_records))
        # Apply Threat Intel to normalized records in the end of Rule Processor invocation
        record_alerts = self._rule_engine.threat_intel_match(
            payload_with_normalized_records)
        self._alerts.extend(record_alerts)
        if record_alerts and self.enable_alert_processor:
            self.sinker.sink(record_alerts)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TOTAL_RECORDS,
                                self._processed_record_count)

        MetricLogger.log_metric(FUNCTION_NAME,
                                MetricLogger.TOTAL_PROCESSED_SIZE,
                                self._processed_size)

        LOGGER.debug('Invalid record count: %d', self._failed_record_count)

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FAILED_PARSES,
                                self._failed_record_count)

        LOGGER.debug('%s alerts triggered', len(self._alerts))

        MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.TRIGGERED_ALERTS,
                                len(self._alerts))

        # Check if debugging logging is on before json dumping alerts since
        # this can be time consuming if there are a lot of alerts
        if self._alerts and LOGGER.isEnabledFor(LOG_LEVEL_DEBUG):
            LOGGER.debug('Alerts:\n%s', json.dumps(self._alerts, indent=2))

        if self._firehose_client:
            self._firehose_client.send()

        return self._failed_record_count == 0
Example #18
0
    def _firehose_request_helper(self, stream_name, record_batch):
        """Send record batches to Firehose

        Args:
            stream_name (str): The name of the Delivery Stream to send to
            record_batch (list): The records to send
        """
        exceptions_to_backoff = (ClientError, ConnectionError, Timeout)

        @backoff.on_predicate(backoff.fibo,
                              lambda resp: resp['FailedPutCount'] > 0,
                              max_tries=self.MAX_BACKOFF_ATTEMPTS,
                              max_value=self.MAX_BACKOFF_FIBO_VALUE,
                              jitter=backoff.full_jitter,
                              on_backoff=backoff_handler(debug_only=False),
                              on_success=success_handler(),
                              on_giveup=giveup_handler())
        @backoff.on_exception(backoff.fibo,
                              exceptions_to_backoff,
                              max_tries=self.MAX_BACKOFF_ATTEMPTS,
                              jitter=backoff.full_jitter,
                              on_backoff=backoff_handler(debug_only=False),
                              on_success=success_handler(),
                              on_giveup=giveup_handler())
        def firehose_request_wrapper(data):
            """Firehose request wrapper to use with backoff"""
            # Use the current length of data here so we can track failed records that are retried
            LOGGER.info('[Firehose] Sending %d records to %s', len(data), stream_name)

            response = self._client.put_record_batch(DeliveryStreamName=stream_name, Records=data)

            # Log this as an error for now so it can be picked up in logs
            if response['FailedPutCount'] > 0:
                LOGGER.error('Received non-zero FailedPutCount: %d', response['FailedPutCount'])
                # Strip out the successful records so only the failed ones are retried. This happens
                # to the list of dictionary objects, so the called function sees the updated list
                self._strip_successful_records(data, response)

            return response

        original_batch_size = len(record_batch)

        # The newline at the end is required by Firehose,
        # otherwise all records will be on a single line and
        # unsearchable in Athena.
        records_data = [
            {'Data': json.dumps(self.sanitize_keys(record), separators=(",", ":")) + '\n'}
            for record in record_batch
        ]

        # The try/except here is to catch the raised error at the end of the backoff
        try:
            resp = firehose_request_wrapper(records_data)
        except exceptions_to_backoff as firehose_err:
            LOGGER.error(firehose_err)
            # Use the current length of the records_data in case some records were
            # successful but others were not
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    len(records_data))
            return

        # Error handle if failures occurred in PutRecordBatch after
        # several backoff attempts
        if resp.get('FailedPutCount') > 0:
            failed_records = [failed
                              for failed
                              in resp['RequestResponses']
                              if failed.get('ErrorCode')]
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_FAILED_RECORDS,
                                    resp['FailedPutCount'])
            # Only print the first 100 failed records to Cloudwatch logs
            LOGGER.error('[Firehose] The following records failed to put to '
                         'the Delivery Stream %s: %s',
                         stream_name,
                         json.dumps(failed_records[:100], indent=2))
        else:
            MetricLogger.log_metric(FUNCTION_NAME,
                                    MetricLogger.FIREHOSE_RECORDS_SENT,
                                    original_batch_size)
            LOGGER.info('[Firehose] Successfully sent %d messages to %s with RequestId [%s]',
                        original_batch_size,
                        stream_name,
                        resp.get('ResponseMetadata', {}).get('RequestId', ''))
Example #19
0
 def _backoff_handler_firehose_reset(self, details):
     """Custom backoff handler to re-instantiate the Firehose Client"""
     LOGGER.info(
         '[Backoff]: Calling \'%s\' again in %f seconds with %d tries so far',
         details['target'].__name__, details['wait'], details['tries'])
     self._reset_firehose_client()
Example #20
0
    def _send_to_firehose(self):
        """Send all classified records to a respective Firehose Delivery Stream"""
        def _chunk(record_list, chunk_size):
            """Helper function to chunk payloads"""
            for item in range(0, len(record_list), chunk_size):
                yield record_list[item:item + chunk_size]

        def _check_record_batch(batch):
            """Helper function to verify record size"""
            for index, record in enumerate(batch):
                if len(str(record)) > MAX_RECORD_SIZE:
                    # Show the first 1k bytes in order to not overload
                    # CloudWatch logs
                    LOGGER.error('The following record is too large'
                                 'be sent to Firehose: %s', str(record)[:1000])
                    MetricLogger.log_metric(FUNCTION_NAME,
                                            MetricLogger.FIREHOSE_FAILED_RECORDS,
                                            1)
                    batch.pop(index)

        delivery_stream_name_pattern = 'streamalert_data_{}'

        # Iterate through each payload type
        for log_type, records in self.categorized_payloads.items():
            # This same method is used when naming the Delivery Streams
            formatted_log_type = log_type.replace(':', '_')

            for record_batch in _chunk(records, MAX_BATCH_SIZE):
                stream_name = delivery_stream_name_pattern.format(formatted_log_type)
                _check_record_batch(record_batch)

                resp = self.firehose_client.put_record_batch(
                    DeliveryStreamName=stream_name,
                    # The newline at the end is required by Firehose,
                    # otherwise all records will be on a single line and
                    # unsearchable in Athena.
                    Records=[{'Data': json.dumps(record, separators=(",", ":")) + '\n'}
                             for record
                             in record_batch])

                # Error handle if failures occured
                # TODO(jack) implement backoff here once the rule processor is split
                if resp.get('FailedPutCount') > 0:
                    failed_records = [failed
                                      for failed
                                      in resp['RequestResponses']
                                      if failed.get('ErrorCode')]
                    MetricLogger.log_metric(FUNCTION_NAME,
                                            MetricLogger.FIREHOSE_FAILED_RECORDS,
                                            resp['FailedPutCount'])
                    # Only print the first 100 failed records
                    LOGGER.error('The following records failed to Put to the'
                                 'Delivery stream %s: %s',
                                 stream_name,
                                 json.dumps(failed_records[:100], indent=2))
                else:
                    MetricLogger.log_metric(FUNCTION_NAME,
                                            MetricLogger.FIREHOSE_RECORDS_SENT,
                                            len(record_batch))
                    LOGGER.info('Successfully sent %d messages to Firehose:%s',
                                len(record_batch),
                                stream_name)