Beispiel #1
0
    def handler(cls, options, config):
        """Initialize infrastructure using Terraform

        Args:
            config (CLIConfig): Loaded StreamAlert config

        Returns:
            bool: False if errors occurred, True otherwise
        """

        # Stop here if only initializing the backend
        if options.backend:
            return cls._terraform_init_backend(config)

        LOGGER.info('Initializing StreamAlert')

        # generate init Terraform files
        if not terraform_generate_handler(config=config, init=True):
            return False

        LOGGER.info('Initializing Terraform')
        if not run_command(['terraform', 'init']):
            return False

        # build init infrastructure
        LOGGER.info('Building initial infrastructure')
        init_targets = [
            'aws_s3_bucket.lambda_source', 'aws_s3_bucket.logging_bucket',
            'aws_s3_bucket.streamalert_secrets', 'aws_s3_bucket.terraform_remote_state',
            'aws_s3_bucket.streamalerts',
            'aws_kms_key.server_side_encryption', 'aws_kms_alias.server_side_encryption',
            'aws_kms_key.streamalert_secrets', 'aws_kms_alias.streamalert_secrets',
            'aws_dynamodb_table.terraform_remote_state_lock'
        ]
        if not tf_runner(targets=init_targets):
            LOGGER.error('An error occurred while running StreamAlert init')
            return False

        # generate the main.tf with remote state enabled
        LOGGER.info('Configuring Terraform Remote State')
        if not terraform_generate_handler(config=config, check_tf=False, check_creds=False):
            return False

        if not run_command(['terraform', 'init']):
            return False

        LOGGER.info('Deploying Lambda Functions')

        functions = ['rule', 'alert', 'alert_merger', 'athena', 'classifier']

        deploy(functions, config)

        # we need to manually create the streamalerts table since terraform does not support this
        # See: https://github.com/terraform-providers/terraform-provider-aws/issues/1486
        alerts_bucket = firehose_alerts_bucket(config)
        create_table('alerts', alerts_bucket, config)

        LOGGER.info('Building remainding infrastructure')
        return tf_runner(refresh=False)
Beispiel #2
0
def generate_rule_promotion(config):
    """Generate Terraform for the Rule Promotion function

    Args:
        config (dict): The loaded config from the 'conf/' directory

    Returns:
        dict: Rule Promotion dict to be marshaled to JSON
    """
    # The Rule Promotion Lambda function is dependent on the rule staging feature being
    # enabled, so do not generate the code for this Lambda function if it not enabled
    if not config['global']['infrastructure']['rule_staging'].get(
            'enabled', False):
        return False

    result = infinitedict()

    alerts_bucket = firehose_alerts_bucket(config)

    # Set variables for the IAM permissions, etc module
    result['module']['rule_promotion_iam'] = {
        'source':
        './modules/tf_rule_promotion_iam',
        'send_digest_schedule_expression':
        config['lambda']['rule_promotion_config']
        ['send_digest_schedule_expression'],
        'digest_sns_topic':
        StatsPublisher.formatted_sns_topic_arn(config).split(':')[-1],
        'role_id':
        '${module.rule_promotion_lambda.role_id}',
        'rules_table_arn':
        '${module.globals.rules_table_arn}',
        'function_alias_arn':
        '${module.rule_promotion_lambda.function_alias_arn}',
        'function_name':
        '${module.rule_promotion_lambda.function_name}',
        'athena_results_bucket_arn':
        '${module.streamalert_athena.results_bucket_arn}',
        'alerts_bucket':
        alerts_bucket,
        's3_kms_key_arn':
        '${aws_kms_key.server_side_encryption.arn}'
    }

    # Set variables for the Lambda module
    result['module']['rule_promotion_lambda'] = generate_lambda(
        '{}_streamalert_{}'.format(config['global']['account']['prefix'],
                                   RULE_PROMOTION_NAME),
        RulePromotionPackage.package_name + '.zip',
        RulePromotionPackage.lambda_handler,
        config['lambda']['rule_promotion_config'], config)

    return result
Beispiel #3
0
    def buckets_from_config(cls, config):
        """Get the buckets from default buckets and additionally configured ones

        Args:
            config (dict): The loaded config from the 'conf/' directory

        Returns:
            list: Bucket names for which Athena is enabled
        """
        athena_config = config['lambda']['athena_partition_refresh_config']
        data_buckets = athena_config.get('buckets', {})
        data_buckets[firehose_alerts_bucket(config)] = 'alerts'
        data_bucket = firehose_data_bucket(
            config)  # Data retention is optional, so check for this
        if data_bucket:
            data_buckets[data_bucket] = 'data'

        return data_buckets
Beispiel #4
0
def generate_main(config, init=False):
    """Generate the main.tf.json Terraform dict

    Args:
        config (CLIConfig): The loaded CLI config
        init (bool): Terraform is running in the init phase or not (optional)

    Returns:
        dict: main.tf.json Terraform dict
    """
    write_vars(config, region=config['global']['account']['region'])

    main_dict = infinitedict()

    logging_bucket, create_logging_bucket = s3_access_logging_bucket(config)

    state_lock_table_name = '{}_streamalert_terraform_state_lock'.format(
        config['global']['account']['prefix']
    )
    # Setup the Backend depending on the deployment phase.
    # When first setting up StreamAlert, the Terraform statefile
    # is stored locally.  After the first dependencies are created,
    # this moves to S3.
    if init:
        main_dict['terraform']['backend']['local'] = {
            'path': 'terraform.tfstate',
        }
    else:
        terraform_bucket_name, _ = terraform_state_bucket(config)
        main_dict['terraform']['backend']['s3'] = {
            'bucket': terraform_bucket_name,
            'key': config['global'].get('terraform', {}).get(
                'state_key_name',
                'streamalert_state/terraform.tfstate'
            ),
            'region': config['global']['account']['region'],
            'encrypt': True,
            'dynamodb_table': state_lock_table_name,
            'acl': 'private',
            'kms_key_id': 'alias/{}'.format(
                config['global']['account'].get(
                    'kms_key_alias',
                    '{}_streamalert_secrets'.format(config['global']['account']['prefix'])
                )
            ),
        }

    # Configure initial S3 buckets
    main_dict['resource']['aws_s3_bucket'] = {
        'streamalerts': generate_s3_bucket(
            bucket=firehose_alerts_bucket(config),
            logging=logging_bucket
        )
    }

    # Configure remote state locking table
    main_dict['resource']['aws_dynamodb_table'] = {
        'terraform_remote_state_lock': {
            'name': state_lock_table_name,
            'billing_mode': 'PAY_PER_REQUEST',
            'hash_key': 'LockID',
            'attribute': {
                'name': 'LockID',
                'type': 'S'
            },
            'tags': {
                'Name': 'StreamAlert'
            }
        }
    }

    # Create bucket for S3 access logs (if applicable)
    if create_logging_bucket:
        main_dict['resource']['aws_s3_bucket']['logging_bucket'] = generate_s3_bucket(
            bucket=logging_bucket,
            logging=logging_bucket,
            acl='log-delivery-write',
            lifecycle_rule={
                'prefix': '/',
                'enabled': True,
                'transition': {
                    'days': 365,
                    'storage_class': 'GLACIER'
                }
            },
            sse_algorithm='AES256'  # SSE-KMS doesn't seem to work with access logs
        )

    terraform_bucket_name, create_state_bucket = terraform_state_bucket(config)
    # Create bucket for Terraform state (if applicable)
    if create_state_bucket:
        main_dict['resource']['aws_s3_bucket']['terraform_remote_state'] = generate_s3_bucket(
            bucket=terraform_bucket_name,
            logging=logging_bucket
        )

    # Setup Firehose Delivery Streams
    generate_firehose(logging_bucket, main_dict, config)

    # Configure global resources like Firehose alert delivery and alerts table
    main_dict['module']['globals'] = _generate_global_module(config)

    # KMS Key and Alias creation
    main_dict['resource']['aws_kms_key']['server_side_encryption'] = {
        'enable_key_rotation': True,
        'description': 'StreamAlert S3 Server-Side Encryption',
        'policy': json.dumps({
            'Version': '2012-10-17',
            'Statement': [
                {
                    'Sid': 'Enable IAM User Permissions',
                    'Effect': 'Allow',
                    'Principal': {
                        'AWS': 'arn:aws:iam::{}:root'.format(
                            config['global']['account']['aws_account_id']
                        )
                    },
                    'Action': 'kms:*',
                    'Resource': '*'
                },
                {
                    'Sid': 'Allow principals in the account to use the key',
                    'Effect': 'Allow',
                    'Principal': '*',
                    'Action': ['kms:Decrypt', 'kms:GenerateDataKey*', 'kms:Encrypt'],
                    'Resource': '*',
                    'Condition': {
                        'StringEquals': {
                            'kms:CallerAccount': config['global']['account']['aws_account_id']
                        }
                    }
                }
            ]
        })
    }
    main_dict['resource']['aws_kms_alias']['server_side_encryption'] = {
        'name': 'alias/{}_server-side-encryption'.format(config['global']['account']['prefix']),
        'target_key_id': '${aws_kms_key.server_side_encryption.key_id}'
    }

    main_dict['resource']['aws_kms_key']['streamalert_secrets'] = {
        'enable_key_rotation': True,
        'description': 'StreamAlert secret management'
    }
    main_dict['resource']['aws_kms_alias']['streamalert_secrets'] = {
        'name': 'alias/{}'.format(
            config['global']['account'].get(
                'kms_key_alias',
                '{}_streamalert_secrets'.format(config['global']['account']['prefix'])
            )
        ),
        'target_key_id': '${aws_kms_key.streamalert_secrets.key_id}'
    }

    # Global infrastructure settings
    topic_name, create_topic = monitoring_topic_name(config)
    if create_topic:
        main_dict['resource']['aws_sns_topic']['monitoring'] = {
            'name': topic_name
        }

    return main_dict
Beispiel #5
0
def create_table(table, bucket, config, schema_override=None):
    """Create a 'streamalert' Athena table

    Args:
        table (str): The name of the table being rebuilt
        bucket (str): The s3 bucket to be used as the location for Athena data
        table_type (str): The type of table being refreshed
        config (CLIConfig): Loaded StreamAlert config
        schema_override (set): An optional set of key=value pairs to be used for
            overriding the configured column_name=value_type.

    Returns:
        bool: False if errors occurred, True otherwise
    """
    enabled_logs = FirehoseClient.load_enabled_log_sources(
        config['global']['infrastructure']['firehose'], config['logs'])

    # Convert special characters in schema name to underscores
    sanitized_table_name = FirehoseClient.firehose_log_name(table)

    # Check that the log type is enabled via Firehose
    if sanitized_table_name != 'alerts' and sanitized_table_name not in enabled_logs:
        LOGGER.error(
            'Table name %s missing from configuration or '
            'is not enabled.', sanitized_table_name)
        return False

    athena_client = get_athena_client(config)

    config_data_bucket = firehose_data_bucket(config)
    if not config_data_bucket:
        LOGGER.error('The \'firehose\' module is not enabled in global.json')
        return False

    # Check if the table exists
    if athena_client.check_table_exists(sanitized_table_name):
        LOGGER.info('The \'%s\' table already exists.', sanitized_table_name)
        return False

    if table == 'alerts':
        # get a fake alert so we can get the keys needed and their types
        alert = Alert('temp_rule_name', {}, {})
        output = alert.output_dict()
        schema = record_to_schema(output)
        athena_schema = helpers.logs_schema_to_athena_schema(schema)

        # Use the bucket if supplied, otherwise use the default alerts bucket
        bucket = bucket or firehose_alerts_bucket(config)

        query = _construct_create_table_statement(schema=athena_schema,
                                                  table_name=table,
                                                  bucket=bucket)

    else:  # all other tables are log types

        # Use the bucket if supplied, otherwise use the default data bucket
        bucket = bucket or config_data_bucket

        log_info = config['logs'][table.replace('_', ':', 1)]

        schema = dict(log_info['schema'])
        sanitized_schema = FirehoseClient.sanitize_keys(schema)

        athena_schema = helpers.logs_schema_to_athena_schema(sanitized_schema)

        # Add envelope keys to Athena Schema
        configuration_options = log_info.get('configuration')
        if configuration_options:
            envelope_keys = configuration_options.get('envelope_keys')
            if envelope_keys:
                sanitized_envelope_key_schema = FirehoseClient.sanitize_keys(
                    envelope_keys)
                # Note: this key is wrapped in backticks to be Hive compliant
                athena_schema[
                    '`streamalert:envelope_keys`'] = helpers.logs_schema_to_athena_schema(
                        sanitized_envelope_key_schema)

        # Handle Schema overrides
        #   This is useful when an Athena schema needs to differ from the normal log schema
        if schema_override:
            for override in schema_override:
                column_name, column_type = override.split('=')
                # Columns are escaped to avoid Hive issues with special characters
                column_name = '`{}`'.format(column_name)
                if column_name in athena_schema:
                    athena_schema[column_name] = column_type
                    LOGGER.info('Applied schema override: %s:%s', column_name,
                                column_type)
                else:
                    LOGGER.error(
                        'Schema override column %s not found in Athena Schema, skipping',
                        column_name)

        query = _construct_create_table_statement(
            schema=athena_schema,
            table_name=sanitized_table_name,
            bucket=bucket)

    success = athena_client.run_query(query=query)
    if not success:
        LOGGER.error('The %s table could not be created', sanitized_table_name)
        return False

    # Update the CLI config
    if table != 'alerts' and bucket != config_data_bucket:
        # Only add buckets to the config if they are not one of the default/configured buckets
        # Ensure 'buckets' exists in the config (since it is not required)
        config['lambda']['athena_partition_refresh_config']['buckets'] = (
            config['lambda']['athena_partition_refresh_config'].get(
                'buckets', {}))
        if bucket not in config['lambda']['athena_partition_refresh_config'][
                'buckets']:
            config['lambda']['athena_partition_refresh_config']['buckets'][
                bucket] = 'data'
            config.write()

    LOGGER.info('The %s table was successfully created!', sanitized_table_name)

    return True
Beispiel #6
0
    def handler(cls, options, config):
        """Initialize infrastructure using Terraform

        Args:
            config (CLIConfig): Loaded StreamAlert config

        Returns:
            bool: False if errors occurred, True otherwise
        """
        LOGGER.info('Initializing StreamAlert')

        # generate init Terraform files
        if not terraform_generate_handler(config=config, init=True):
            return False

        LOGGER.info('Initializing Terraform')
        if not run_command(['terraform', 'init'], cwd=config.build_directory):
            return False

        # build init infrastructure
        LOGGER.info('Building initial infrastructure')
        init_targets = [
            'aws_s3_bucket.lambda_source',
            'aws_s3_bucket.logging_bucket',
            'aws_s3_bucket.streamalert_secrets',
            'aws_s3_bucket.terraform_remote_state',
            'aws_s3_bucket.streamalerts',
            'aws_kms_key.server_side_encryption',
            'aws_kms_alias.server_side_encryption',
            'aws_kms_key.streamalert_secrets',
            'aws_kms_alias.streamalert_secrets',
            'module.streamalert_athena',  #required for the alerts table
            'aws_dynamodb_table.terraform_remote_state_lock'
        ]

        # this bucket must exist before the log tables can be created, but
        # shouldn't be created unless the firehose is enabled
        if config['global']['infrastructure'].get('firehose',
                                                  {}).get('enabled'):
            init_targets.append('aws_s3_bucket.streamalert_data')

        if not terraform_runner(config, targets=init_targets):
            LOGGER.error('An error occurred while running StreamAlert init')
            return False

        # generate the main.tf with remote state enabled
        LOGGER.info('Configuring Terraform Remote State')
        if not terraform_generate_handler(
                config=config, check_tf=False, check_creds=False):
            return False

        if not run_command(['terraform', 'init'], cwd=config.build_directory):
            return False

        LOGGER.info('Deploying Lambda Functions')

        functions = ['rule', 'alert', 'alert_merger', 'athena', 'classifier']

        deploy(config, functions)

        # we need to manually create the streamalerts table since terraform does not support this
        # See: https://github.com/terraform-providers/terraform-provider-aws/issues/1486
        if get_data_file_format(config) == 'json':
            # Terraform v0.12 now supports creating Athena tables. We will support
            # to use terraform aws_glue_catalog_table resource to create table only
            # when data file_format is set to "parquet" in "athena_partitioner_config"
            #
            # For "json" file_format, we will continue using Athena DDL query to
            # create tables. However, this capabity will be faded out in the future
            # release because we want users to take advantage of parquet performance.
            alerts_bucket = firehose_alerts_bucket(config)
            create_table('alerts', alerts_bucket, config)

            # Create the glue catalog tables for the enabled logs
            if not create_log_tables(config=config):
                return

        LOGGER.info('Building remaining infrastructure')
        return terraform_runner(config, refresh=False)