def __init__(self): config = load_config(include={'lambda.json', 'global.json'}) prefix = config['global']['account']['prefix'] athena_config = config['lambda']['athena_partitioner_config'] self._file_format = get_data_file_format(config) if self._file_format == 'parquet': self._alerts_regex = self.ALERTS_REGEX_PARQUET self._data_regex = self.DATA_REGEX_PARQUET elif self._file_format == 'json': self._alerts_regex = self.ALERTS_REGEX self._data_regex = self.DATA_REGEX else: message = ( 'file format "{}" is not supported. Supported file format are ' '"parquet", "json". Please update the setting in athena_partitioner_config ' 'in "conf/lambda.json"'.format(self._file_format)) raise ConfigError(message) self._athena_buckets = athena_partition_buckets(config) db_name = get_database_name(config) # Get the S3 bucket to store Athena query results results_bucket = athena_config.get( 'results_bucket', 's3://{}-streamalert-athena-results'.format(prefix)) self._s3_buckets_and_keys = defaultdict(set) self._create_client(db_name, results_bucket)
def get_athena_client(config): """Get an athena client using the current config settings Args: config (CLIConfig): Loaded StreamAlert config Returns: AthenaClient: instantiated client for performing athena actions """ prefix = config['global']['account']['prefix'] athena_config = config['lambda']['athena_partitioner_config'] db_name = get_database_name(config) # Get the S3 bucket to store Athena query results results_bucket = athena_config.get( 'results_bucket', 's3://{}-streamalert-athena-results'.format(prefix) ) return AthenaClient( db_name, results_bucket, 'streamalert_cli', region=config['global']['account']['region'] )
def _generate_global_module(config): # 2019-08-22 (Ryxias) # In version 3.0.0+, StreamAlert will default to always using the prefix, when "use_prefix" # is not present. # # Refer to this PR for more information: https://github.com/airbnb/streamalert/pull/979 use_prefix = config['global']['infrastructure'].get('classifier_sqs', {}).get( 'use_prefix', True) global_module = { 'source': './modules/tf_globals', 'account_id': config['global']['account']['aws_account_id'], 'region': config['global']['account']['region'], 'prefix': config['global']['account']['prefix'], 'kms_key_arn': '${aws_kms_key.server_side_encryption.arn}', 'rules_engine_timeout': config['lambda']['rules_engine_config']['timeout'], 'sqs_use_prefix': use_prefix, 'alerts_db_name': get_database_name(config), 'alerts_file_format': get_data_file_format(config), 'alerts_schema': generate_alerts_table_schema() } # The below code applies settings for resources only if the settings are explicitly # defined. This is because these resources have defaults defined in the # ./modules/tf_globals module. This will allow for overriding these setting, but # avoids storing defaults in mulitple locations if 'alerts_table' in config['global']['infrastructure']: for setting in {'read_capacity', 'write_capacity'}: value = config['global']['infrastructure']['alerts_table'].get( setting) if value: global_module['alerts_table_{}'.format(setting)] = value alert_fh_settings_with_defaults = { 'bucket_name', 'buffer_size', 'buffer_interval', 'cloudwatch_log_retention' } if 'alerts_firehose' in config['global']['infrastructure']: for setting in alert_fh_settings_with_defaults: value = config['global']['infrastructure']['alerts_firehose'].get( setting) if not value: continue global_module['alerts_firehose_{}'.format(setting)] = value if 'rule_staging' in config['global']['infrastructure']: if config['global']['infrastructure']['rule_staging'].get('enabled'): global_module['enable_rule_staging'] = True for setting in {'table_read_capacity', 'table_write_capacity'}: value = config['global']['infrastructure']['rule_staging'].get( setting) if value: # Defaults are set for this in the terraform module, so skip global_module['rules_{}'.format(setting)] = value return global_module
def generate_artifact_extractor(config): """Generate Terraform for the Artifact Extractor Lambda function Args: config (dict): The loaded config from the 'conf/' directory Returns: dict: Artifact Extractor Terraform definition to be marshaled to JSON """ result = infinitedict() if not artifact_extractor_enabled(config): return ae_config = config['global']['infrastructure']['artifact_extractor'] stream_name = FirehoseClient.artifacts_firehose_stream_name(config) # Set variables for the artifact extractor module result['module']['artifact_extractor'] = { 'source': './modules/tf_artifact_extractor', 'account_id': config['global']['account']['aws_account_id'], 'prefix': config['global']['account']['prefix'], 'region': config['global']['account']['region'], 'glue_catalog_db_name': get_database_name(config), 'glue_catalog_table_name': ae_config.get('table_name', DEFAULT_ARTIFACTS_TABLE_NAME), 's3_bucket_name': firehose_data_bucket(config), 'stream_name': stream_name, 'buffer_size': ae_config.get('firehose_buffer_size', 128), 'buffer_interval': ae_config.get('firehose_buffer_interval', 900), 'kms_key_arn': '${aws_kms_key.server_side_encryption.arn}', 'schema': generate_artifacts_table_schema() } return result
def __init__(self): self._config = load_config() prefix = self._config['global']['account']['prefix'] # Create the rule table class for getting staging information self._rule_table = RuleTable('{}_streamalert_rules'.format(prefix)) athena_config = self._config['lambda']['athena_partitioner_config'] # Get the name of the athena database to access db_name = athena_config.get('database_name', get_database_name(self._config)) # Get the S3 bucket to store Athena query results results_bucket = athena_config.get( 'results_bucket', 's3://{}-streamalert-athena-results'.format(prefix)) self._athena_client = AthenaClient(db_name, results_bucket, self.ATHENA_S3_PREFIX) self._current_time = datetime.utcnow() self._staging_stats = dict()
def generate_firehose(logging_bucket, main_dict, config): """Generate the Firehose Terraform modules Args: config (CLIConfig): The loaded StreamAlert Config main_dict (infinitedict): The Dict to marshal to a file logging_bucket (str): The name of the global logging bucket """ if not config['global']['infrastructure'].get('firehose', {}).get('enabled'): return prefix = config['global']['account']['prefix'] # This can return False but the check above ensures that that should never happen firehose_s3_bucket_name = firehose_data_bucket(config) firehose_conf = config['global']['infrastructure']['firehose'] # Firehose Setup module main_dict['module']['kinesis_firehose_setup'] = { 'source': './modules/tf_kinesis_firehose_setup', 'account_id': config['global']['account']['aws_account_id'], 'prefix': prefix, 'region': config['global']['account']['region'], 's3_logging_bucket': logging_bucket, 's3_bucket_name': firehose_s3_bucket_name, 'kms_key_id': '${aws_kms_key.server_side_encryption.key_id}' } enabled_logs = FirehoseClient.load_enabled_log_sources(firehose_conf, config['logs'], force_load=True) log_alarms_config = firehose_conf.get('enabled_logs', {}) db_name = get_database_name(config) firehose_prefix = prefix if firehose_conf.get('use_prefix', True) else '' # Add the Delivery Streams individually for log_stream_name, log_type_name in enabled_logs.items(): module_dict = { 'source': './modules/tf_kinesis_firehose_delivery_stream', 'buffer_size': (firehose_conf.get('buffer_size')), 'buffer_interval': (firehose_conf.get('buffer_interval', 300)), 'file_format': get_data_file_format(config), 'stream_name': FirehoseClient.generate_firehose_name(firehose_prefix, log_stream_name), 'role_arn': '${module.kinesis_firehose_setup.firehose_role_arn}', 's3_bucket_name': firehose_s3_bucket_name, 'kms_key_arn': '${aws_kms_key.server_side_encryption.arn}', 'glue_catalog_db_name': db_name, 'glue_catalog_table_name': log_stream_name, 'schema': generate_data_table_schema(config, log_type_name) } # Try to get alarm info for this specific log type alarm_info = log_alarms_config.get(log_type_name) if not alarm_info and ':' in log_type_name: # Fallback on looking for alarm info for the parent log type alarm_info = log_alarms_config.get(log_type_name.split(':')[0]) if alarm_info and alarm_info.get('enable_alarm'): module_dict['enable_alarm'] = True # There are defaults of these defined in the terraform module, so do # not set the variable values unless explicitly specified if alarm_info.get('log_min_count_threshold'): module_dict['alarm_threshold'] = alarm_info.get( 'log_min_count_threshold') if alarm_info.get('evaluation_periods'): module_dict['evaluation_periods'] = alarm_info.get( 'evaluation_periods') if alarm_info.get('period_seconds'): module_dict['period_seconds'] = alarm_info.get( 'period_seconds') if alarm_info.get('alarm_actions'): if not isinstance(alarm_info.get('alarm_actions'), list): module_dict['alarm_actions'] = [ alarm_info.get('alarm_actions') ] else: module_dict['alarm_actions'] = alarm_info.get( 'alarm_actions') else: module_dict['alarm_actions'] = [monitoring_topic_arn(config)] main_dict['module']['kinesis_firehose_{}'.format( log_stream_name)] = module_dict