def handler(cls, options, config): """Initialize infrastructure using Terraform Args: config (CLIConfig): Loaded StreamAlert config Returns: bool: False if errors occurred, True otherwise """ # Stop here if only initializing the backend if options.backend: return cls._terraform_init_backend(config) LOGGER.info('Initializing StreamAlert') # generate init Terraform files if not terraform_generate_handler(config=config, init=True): return False LOGGER.info('Initializing Terraform') if not run_command(['terraform', 'init']): return False # build init infrastructure LOGGER.info('Building initial infrastructure') init_targets = [ 'aws_s3_bucket.lambda_source', 'aws_s3_bucket.logging_bucket', 'aws_s3_bucket.streamalert_secrets', 'aws_s3_bucket.terraform_remote_state', 'aws_s3_bucket.streamalerts', 'aws_kms_key.server_side_encryption', 'aws_kms_alias.server_side_encryption', 'aws_kms_key.streamalert_secrets', 'aws_kms_alias.streamalert_secrets', 'aws_dynamodb_table.terraform_remote_state_lock' ] if not tf_runner(targets=init_targets): LOGGER.error('An error occurred while running StreamAlert init') return False # generate the main.tf with remote state enabled LOGGER.info('Configuring Terraform Remote State') if not terraform_generate_handler(config=config, check_tf=False, check_creds=False): return False if not run_command(['terraform', 'init']): return False LOGGER.info('Deploying Lambda Functions') functions = ['rule', 'alert', 'alert_merger', 'athena', 'classifier'] deploy(functions, config) # we need to manually create the streamalerts table since terraform does not support this # See: https://github.com/terraform-providers/terraform-provider-aws/issues/1486 alerts_bucket = firehose_alerts_bucket(config) create_table('alerts', alerts_bucket, config) LOGGER.info('Building remainding infrastructure') return tf_runner(refresh=False)
def generate_rule_promotion(config): """Generate Terraform for the Rule Promotion function Args: config (dict): The loaded config from the 'conf/' directory Returns: dict: Rule Promotion dict to be marshaled to JSON """ # The Rule Promotion Lambda function is dependent on the rule staging feature being # enabled, so do not generate the code for this Lambda function if it not enabled if not config['global']['infrastructure']['rule_staging'].get( 'enabled', False): return False result = infinitedict() alerts_bucket = firehose_alerts_bucket(config) # Set variables for the IAM permissions, etc module result['module']['rule_promotion_iam'] = { 'source': './modules/tf_rule_promotion_iam', 'send_digest_schedule_expression': config['lambda']['rule_promotion_config'] ['send_digest_schedule_expression'], 'digest_sns_topic': StatsPublisher.formatted_sns_topic_arn(config).split(':')[-1], 'role_id': '${module.rule_promotion_lambda.role_id}', 'rules_table_arn': '${module.globals.rules_table_arn}', 'function_alias_arn': '${module.rule_promotion_lambda.function_alias_arn}', 'function_name': '${module.rule_promotion_lambda.function_name}', 'athena_results_bucket_arn': '${module.streamalert_athena.results_bucket_arn}', 'alerts_bucket': alerts_bucket, 's3_kms_key_arn': '${aws_kms_key.server_side_encryption.arn}' } # Set variables for the Lambda module result['module']['rule_promotion_lambda'] = generate_lambda( '{}_streamalert_{}'.format(config['global']['account']['prefix'], RULE_PROMOTION_NAME), RulePromotionPackage.package_name + '.zip', RulePromotionPackage.lambda_handler, config['lambda']['rule_promotion_config'], config) return result
def buckets_from_config(cls, config): """Get the buckets from default buckets and additionally configured ones Args: config (dict): The loaded config from the 'conf/' directory Returns: list: Bucket names for which Athena is enabled """ athena_config = config['lambda']['athena_partition_refresh_config'] data_buckets = athena_config.get('buckets', {}) data_buckets[firehose_alerts_bucket(config)] = 'alerts' data_bucket = firehose_data_bucket( config) # Data retention is optional, so check for this if data_bucket: data_buckets[data_bucket] = 'data' return data_buckets
def generate_main(config, init=False): """Generate the main.tf.json Terraform dict Args: config (CLIConfig): The loaded CLI config init (bool): Terraform is running in the init phase or not (optional) Returns: dict: main.tf.json Terraform dict """ write_vars(config, region=config['global']['account']['region']) main_dict = infinitedict() logging_bucket, create_logging_bucket = s3_access_logging_bucket(config) state_lock_table_name = '{}_streamalert_terraform_state_lock'.format( config['global']['account']['prefix'] ) # Setup the Backend depending on the deployment phase. # When first setting up StreamAlert, the Terraform statefile # is stored locally. After the first dependencies are created, # this moves to S3. if init: main_dict['terraform']['backend']['local'] = { 'path': 'terraform.tfstate', } else: terraform_bucket_name, _ = terraform_state_bucket(config) main_dict['terraform']['backend']['s3'] = { 'bucket': terraform_bucket_name, 'key': config['global'].get('terraform', {}).get( 'state_key_name', 'streamalert_state/terraform.tfstate' ), 'region': config['global']['account']['region'], 'encrypt': True, 'dynamodb_table': state_lock_table_name, 'acl': 'private', 'kms_key_id': 'alias/{}'.format( config['global']['account'].get( 'kms_key_alias', '{}_streamalert_secrets'.format(config['global']['account']['prefix']) ) ), } # Configure initial S3 buckets main_dict['resource']['aws_s3_bucket'] = { 'streamalerts': generate_s3_bucket( bucket=firehose_alerts_bucket(config), logging=logging_bucket ) } # Configure remote state locking table main_dict['resource']['aws_dynamodb_table'] = { 'terraform_remote_state_lock': { 'name': state_lock_table_name, 'billing_mode': 'PAY_PER_REQUEST', 'hash_key': 'LockID', 'attribute': { 'name': 'LockID', 'type': 'S' }, 'tags': { 'Name': 'StreamAlert' } } } # Create bucket for S3 access logs (if applicable) if create_logging_bucket: main_dict['resource']['aws_s3_bucket']['logging_bucket'] = generate_s3_bucket( bucket=logging_bucket, logging=logging_bucket, acl='log-delivery-write', lifecycle_rule={ 'prefix': '/', 'enabled': True, 'transition': { 'days': 365, 'storage_class': 'GLACIER' } }, sse_algorithm='AES256' # SSE-KMS doesn't seem to work with access logs ) terraform_bucket_name, create_state_bucket = terraform_state_bucket(config) # Create bucket for Terraform state (if applicable) if create_state_bucket: main_dict['resource']['aws_s3_bucket']['terraform_remote_state'] = generate_s3_bucket( bucket=terraform_bucket_name, logging=logging_bucket ) # Setup Firehose Delivery Streams generate_firehose(logging_bucket, main_dict, config) # Configure global resources like Firehose alert delivery and alerts table main_dict['module']['globals'] = _generate_global_module(config) # KMS Key and Alias creation main_dict['resource']['aws_kms_key']['server_side_encryption'] = { 'enable_key_rotation': True, 'description': 'StreamAlert S3 Server-Side Encryption', 'policy': json.dumps({ 'Version': '2012-10-17', 'Statement': [ { 'Sid': 'Enable IAM User Permissions', 'Effect': 'Allow', 'Principal': { 'AWS': 'arn:aws:iam::{}:root'.format( config['global']['account']['aws_account_id'] ) }, 'Action': 'kms:*', 'Resource': '*' }, { 'Sid': 'Allow principals in the account to use the key', 'Effect': 'Allow', 'Principal': '*', 'Action': ['kms:Decrypt', 'kms:GenerateDataKey*', 'kms:Encrypt'], 'Resource': '*', 'Condition': { 'StringEquals': { 'kms:CallerAccount': config['global']['account']['aws_account_id'] } } } ] }) } main_dict['resource']['aws_kms_alias']['server_side_encryption'] = { 'name': 'alias/{}_server-side-encryption'.format(config['global']['account']['prefix']), 'target_key_id': '${aws_kms_key.server_side_encryption.key_id}' } main_dict['resource']['aws_kms_key']['streamalert_secrets'] = { 'enable_key_rotation': True, 'description': 'StreamAlert secret management' } main_dict['resource']['aws_kms_alias']['streamalert_secrets'] = { 'name': 'alias/{}'.format( config['global']['account'].get( 'kms_key_alias', '{}_streamalert_secrets'.format(config['global']['account']['prefix']) ) ), 'target_key_id': '${aws_kms_key.streamalert_secrets.key_id}' } # Global infrastructure settings topic_name, create_topic = monitoring_topic_name(config) if create_topic: main_dict['resource']['aws_sns_topic']['monitoring'] = { 'name': topic_name } return main_dict
def create_table(table, bucket, config, schema_override=None): """Create a 'streamalert' Athena table Args: table (str): The name of the table being rebuilt bucket (str): The s3 bucket to be used as the location for Athena data table_type (str): The type of table being refreshed config (CLIConfig): Loaded StreamAlert config schema_override (set): An optional set of key=value pairs to be used for overriding the configured column_name=value_type. Returns: bool: False if errors occurred, True otherwise """ enabled_logs = FirehoseClient.load_enabled_log_sources( config['global']['infrastructure']['firehose'], config['logs']) # Convert special characters in schema name to underscores sanitized_table_name = FirehoseClient.firehose_log_name(table) # Check that the log type is enabled via Firehose if sanitized_table_name != 'alerts' and sanitized_table_name not in enabled_logs: LOGGER.error( 'Table name %s missing from configuration or ' 'is not enabled.', sanitized_table_name) return False athena_client = get_athena_client(config) config_data_bucket = firehose_data_bucket(config) if not config_data_bucket: LOGGER.error('The \'firehose\' module is not enabled in global.json') return False # Check if the table exists if athena_client.check_table_exists(sanitized_table_name): LOGGER.info('The \'%s\' table already exists.', sanitized_table_name) return False if table == 'alerts': # get a fake alert so we can get the keys needed and their types alert = Alert('temp_rule_name', {}, {}) output = alert.output_dict() schema = record_to_schema(output) athena_schema = helpers.logs_schema_to_athena_schema(schema) # Use the bucket if supplied, otherwise use the default alerts bucket bucket = bucket or firehose_alerts_bucket(config) query = _construct_create_table_statement(schema=athena_schema, table_name=table, bucket=bucket) else: # all other tables are log types # Use the bucket if supplied, otherwise use the default data bucket bucket = bucket or config_data_bucket log_info = config['logs'][table.replace('_', ':', 1)] schema = dict(log_info['schema']) sanitized_schema = FirehoseClient.sanitize_keys(schema) athena_schema = helpers.logs_schema_to_athena_schema(sanitized_schema) # Add envelope keys to Athena Schema configuration_options = log_info.get('configuration') if configuration_options: envelope_keys = configuration_options.get('envelope_keys') if envelope_keys: sanitized_envelope_key_schema = FirehoseClient.sanitize_keys( envelope_keys) # Note: this key is wrapped in backticks to be Hive compliant athena_schema[ '`streamalert:envelope_keys`'] = helpers.logs_schema_to_athena_schema( sanitized_envelope_key_schema) # Handle Schema overrides # This is useful when an Athena schema needs to differ from the normal log schema if schema_override: for override in schema_override: column_name, column_type = override.split('=') # Columns are escaped to avoid Hive issues with special characters column_name = '`{}`'.format(column_name) if column_name in athena_schema: athena_schema[column_name] = column_type LOGGER.info('Applied schema override: %s:%s', column_name, column_type) else: LOGGER.error( 'Schema override column %s not found in Athena Schema, skipping', column_name) query = _construct_create_table_statement( schema=athena_schema, table_name=sanitized_table_name, bucket=bucket) success = athena_client.run_query(query=query) if not success: LOGGER.error('The %s table could not be created', sanitized_table_name) return False # Update the CLI config if table != 'alerts' and bucket != config_data_bucket: # Only add buckets to the config if they are not one of the default/configured buckets # Ensure 'buckets' exists in the config (since it is not required) config['lambda']['athena_partition_refresh_config']['buckets'] = ( config['lambda']['athena_partition_refresh_config'].get( 'buckets', {})) if bucket not in config['lambda']['athena_partition_refresh_config'][ 'buckets']: config['lambda']['athena_partition_refresh_config']['buckets'][ bucket] = 'data' config.write() LOGGER.info('The %s table was successfully created!', sanitized_table_name) return True
def handler(cls, options, config): """Initialize infrastructure using Terraform Args: config (CLIConfig): Loaded StreamAlert config Returns: bool: False if errors occurred, True otherwise """ LOGGER.info('Initializing StreamAlert') # generate init Terraform files if not terraform_generate_handler(config=config, init=True): return False LOGGER.info('Initializing Terraform') if not run_command(['terraform', 'init'], cwd=config.build_directory): return False # build init infrastructure LOGGER.info('Building initial infrastructure') init_targets = [ 'aws_s3_bucket.lambda_source', 'aws_s3_bucket.logging_bucket', 'aws_s3_bucket.streamalert_secrets', 'aws_s3_bucket.terraform_remote_state', 'aws_s3_bucket.streamalerts', 'aws_kms_key.server_side_encryption', 'aws_kms_alias.server_side_encryption', 'aws_kms_key.streamalert_secrets', 'aws_kms_alias.streamalert_secrets', 'module.streamalert_athena', #required for the alerts table 'aws_dynamodb_table.terraform_remote_state_lock' ] # this bucket must exist before the log tables can be created, but # shouldn't be created unless the firehose is enabled if config['global']['infrastructure'].get('firehose', {}).get('enabled'): init_targets.append('aws_s3_bucket.streamalert_data') if not terraform_runner(config, targets=init_targets): LOGGER.error('An error occurred while running StreamAlert init') return False # generate the main.tf with remote state enabled LOGGER.info('Configuring Terraform Remote State') if not terraform_generate_handler( config=config, check_tf=False, check_creds=False): return False if not run_command(['terraform', 'init'], cwd=config.build_directory): return False LOGGER.info('Deploying Lambda Functions') functions = ['rule', 'alert', 'alert_merger', 'athena', 'classifier'] deploy(config, functions) # we need to manually create the streamalerts table since terraform does not support this # See: https://github.com/terraform-providers/terraform-provider-aws/issues/1486 if get_data_file_format(config) == 'json': # Terraform v0.12 now supports creating Athena tables. We will support # to use terraform aws_glue_catalog_table resource to create table only # when data file_format is set to "parquet" in "athena_partitioner_config" # # For "json" file_format, we will continue using Athena DDL query to # create tables. However, this capabity will be faded out in the future # release because we want users to take advantage of parquet performance. alerts_bucket = firehose_alerts_bucket(config) create_table('alerts', alerts_bucket, config) # Create the glue catalog tables for the enabled logs if not create_log_tables(config=config): return LOGGER.info('Building remaining infrastructure') return terraform_runner(config, refresh=False)