def set_prefix(self, prefix):
        """Set the Org Prefix in Global settings"""
        if not isinstance(prefix, (unicode, str)):
            LOGGER_CLI.error('Invalid prefix type, must be string')
            return

        if '_' in prefix:
            LOGGER_CLI.error('Prefix cannot contain underscores')
            return

        self.config['global']['account']['prefix'] = prefix
        self.config['global']['terraform']['tfstate_bucket'] = self.config[
            'global']['terraform']['tfstate_bucket'].replace(
                'PREFIX_GOES_HERE', prefix)

        self.config['lambda']['alert_processor_config'][
            'source_bucket'] = self.config['lambda']['alert_processor_config'][
                'source_bucket'].replace('PREFIX_GOES_HERE', prefix)
        self.config['lambda']['rule_processor_config'][
            'source_bucket'] = self.config['lambda']['rule_processor_config'][
                'source_bucket'].replace('PREFIX_GOES_HERE', prefix)

        if self.config['lambda'].get('stream_alert_apps_config'):
            self.config['lambda']['stream_alert_apps_config'][
                'source_bucket'] = self.config['lambda'][
                    'stream_alert_apps_config']['source_bucket'].replace(
                        'PREFIX_GOES_HERE', prefix)

        if self.config['lambda'].get('threat_intel_downloader_config'):
            self.config['lambda']['threat_intel_downloader_config']['source_bucket'] = \
                self.config['lambda'][
                    'threat_intel_downloader_config']['source_bucket'].replace(
                        'PREFIX_GOES_HERE', prefix
                    )
        self.write()

        LOGGER_CLI.info('Prefix successfully configured')
Beispiel #2
0
def terraform_generate(**kwargs):
    """Generate all Terraform plans for the configured clusters.

    Keyword Args:
        config [dict]: The loaded config from the 'conf/' directory
        init [bool]: Indicates if main.tf is generated for `terraform init`
    """
    config = kwargs.get('config')
    init = kwargs.get('init', False)

    # Setup main
    LOGGER_CLI.info('Generating cluster file: main.tf')
    main_json = json.dumps(generate_main(init=init, config=config),
                           indent=2,
                           sort_keys=True)
    with open('terraform/main.tf', 'w') as tf_file:
        tf_file.write(main_json)

    # Break out early during the init process, clusters aren't needed yet
    if init:
        return True

    # Setup clusters
    for cluster in config.clusters():
        if cluster == 'main':
            raise InvalidClusterName(
                'Rename cluster "main" to something else!')

        LOGGER_CLI.info('Generating cluster file: %s.tf', cluster)
        cluster_json = json.dumps(generate_cluster(cluster_name=cluster,
                                                   config=config),
                                  indent=2,
                                  sort_keys=True)
        with open('terraform/{}.tf'.format(cluster), 'w') as tf_file:
            tf_file.write(cluster_json)

    return True
Beispiel #3
0
def tf_runner(action='apply', refresh=True, auto_approve=False, targets=None):
    """Terraform wrapper to build StreamAlert infrastructure.

    Resolves modules with `terraform get` before continuing.

    Args:
        action (str): Terraform action ('apply' or 'destroy').
        refresh (bool): If True, Terraform will refresh its state before applying the change.
        auto_approve (bool): If True, Terraform will *not* prompt the user for approval.
        targets (list): Optional list of affected targets.
            If not specified, Terraform will run against all of its resources.

    Returns:
        bool: True if the terraform command was successful
    """
    LOGGER_CLI.debug('Resolving Terraform modules')
    if not run_command(['terraform', 'get'], quiet=True):
        return False

    tf_command = [
        'terraform', action, '-var-file=../conf/lambda.json',
        '-refresh={}'.format(str(refresh).lower())
    ]

    if action == 'destroy':
        # Terraform destroy has a '-force' flag instead of '-auto-approve'
        LOGGER_CLI.info('Destroying infrastructure')
        tf_command.append('-force={}'.format(str(auto_approve).lower()))
    else:
        LOGGER_CLI.info('%s changes',
                        'Applying' if auto_approve else 'Planning')
        tf_command.append('-auto-approve={}'.format(str(auto_approve).lower()))

    if targets:
        tf_command.extend('-target={}'.format(x) for x in targets)

    return run_command(tf_command)
Beispiel #4
0
    def set_prefix(self, prefix):
        """Set the Org Prefix in Global settings"""
        if not isinstance(prefix, (unicode, str)):
            LOGGER_CLI.error('Invalid prefix type, must be string')
            return

        if '_' in prefix:
            LOGGER_CLI.error('Prefix cannot contain underscores')
            return

        tf_state_bucket = '{}.streamalert.terraform.state'.format(prefix)
        self.config['global']['account']['prefix'] = prefix
        self.config['global']['account']['kms_key_alias'] = '{}_streamalert_secrets'.format(prefix)
        self.config['global']['terraform']['tfstate_bucket'] = tf_state_bucket
        self.config['lambda']['athena_partition_refresh_config']['buckets'].clear()
        self.config['lambda']['athena_partition_refresh_config']['buckets'] \
            ['{}.streamalerts'.format(prefix)] = 'alerts'

        lambda_funcs = [
            'alert_merger',
            'alert_processor',
            'athena_partition_refresh',
            'rule_processor',
            'stream_alert_apps',
            'threat_intel_downloader'
        ]

        # Update all function configurations with the source streamalert source bucket info
        source_bucket = '{}.streamalert.source'.format(prefix)
        for func in lambda_funcs:
            func_config = '{}_config'.format(func)
            if func_config in self.config['lambda']:
                self.config['lambda'][func_config]['source_bucket'] = source_bucket

        self.write()

        LOGGER_CLI.info('Prefix successfully configured')
Beispiel #5
0
def tf_runner(**kwargs):
    """Terraform wrapper to build StreamAlert infrastructure.

    Steps:
        - resolve modules with `terraform get`
        - run `terraform plan` for the given targets
        - if plan is successful and user confirms prompt,
          then the infrastructure is applied

    kwargs:
        targets: a list of Terraform targets
        action: 'apply' or 'destroy'

    Returns:
        bool: True if the terraform command was successful
    """
    targets = kwargs.get('targets', [])
    action = kwargs.get('action', None)
    tf_action_index = 1  # The index to the terraform 'action'

    var_files = {'conf/lambda.json'}
    tf_opts = ['-var-file=../{}'.format(x) for x in var_files]
    tf_targets = ['-target={}'.format(x) for x in targets]
    tf_command = ['terraform', 'plan'] + tf_opts + tf_targets
    if action == 'destroy':
        tf_command.append('-destroy')

    LOGGER_CLI.debug('Resolving Terraform modules')
    if not run_command(['terraform', 'get'], quiet=True):
        return False

    LOGGER_CLI.info('Planning infrastructure')
    if not run_command(tf_command):
        return False

    if not continue_prompt():
        sys.exit(1)

    if action == 'destroy':
        LOGGER_CLI.info('Destroying infrastructure')
        tf_command[tf_action_index] = action
        tf_command.remove('-destroy')
        tf_command.append('-force')

    elif action:
        tf_command[tf_action_index] = action

    else:
        LOGGER_CLI.info('Creating infrastructure')
        tf_command[tf_action_index] = 'apply'
        tf_command.append('-refresh=false')

    if not run_command(tf_command):
        return False

    return True
Beispiel #6
0
    def _resolve_third_party(self, temp_package_path):
        """Install all third-party packages into the deployment package folder

        Args:
            temp_package_path (str): Full path to temp package path

        Returns:
            bool: False if the pip command failed to install requirements, True otherwise
        """
        third_party_libs = self.config['lambda'][self.config_key]['third_party_libraries']
        # Return a default of True here if no libraries to install
        if not third_party_libs:
            LOGGER_CLI.info('No third-party libraries to install.')
            return True

        LOGGER_CLI.info(
            'Installing third-party libraries: %s',
            ', '.join(third_party_libs))
        pip_command = ['pip', 'install']
        pip_command.extend(third_party_libs)
        pip_command.extend(['--upgrade', '--target', temp_package_path])

        # Return True if the pip command is successfully run
        return run_command(pip_command, cwd=temp_package_path, quiet=True)
Beispiel #7
0
    def _resolve_third_party(self, temp_package_path):
        """Install all third-party packages into the deployment package folder

        Args:
            temp_package_path [string]: Full path to temp package path

        Returns:
            [boolean] False if the pip command failed to install requirements, True otherwise
        """
        third_party_libs = self.config['lambda'][
            self.config_key]['third_party_libraries']
        if third_party_libs:
            LOGGER_CLI.info('Installing third-party libraries: %s',
                            ', '.join(third_party_libs))
            pip_command = ['install']
            pip_command.extend(third_party_libs)
            pip_command.extend(['--upgrade', '--target', temp_package_path])
            # Return True if the pip result code is 0
            return pip.main(pip_command) == 0
        else:
            LOGGER_CLI.info('No third-party libraries to install.')

        # Return a default of True here if pip is not called
        return True
def generate_flow_logs(cluster_name, cluster_dict, config):
    """Add the VPC Flow Logs module to the Terraform cluster dict.

    Args:
        cluster_name [string]: The name of the currently generating cluster
        cluster_dict [defaultdict]: The dict containing all Terraform config for
                                    a given cluster.
        config [dict]: The loaded config from the 'conf/' directory

    Returns:
        [bool] Result of applying the flow_logs module
    """
    modules = config['clusters'][cluster_name]['modules']
    flow_log_group_name_default = '{}_{}_streamalert_flow_logs'.format(
        config['global']['account']['prefix'], cluster_name)
    flow_log_group_name = modules['flow_logs'].get(
        'log_group_name', flow_log_group_name_default)

    if modules['flow_logs']['enabled']:
        cluster_dict['module']['flow_logs_{}'.format(cluster_name)] = {
            'source':
            'modules/tf_stream_alert_flow_logs',
            'destination_stream_arn':
            '${{module.kinesis_{}.arn}}'.format(cluster_name),
            'flow_log_group_name':
            flow_log_group_name
        }
        for flow_log_input in ('vpcs', 'subnets', 'enis'):
            input_data = modules['flow_logs'].get(flow_log_input)
            if input_data:
                cluster_dict['module']['flow_logs_{}'.format(
                    cluster_name)][flow_log_input] = input_data
        return True
    else:
        LOGGER_CLI.info('Flow logs disabled, nothing to do')
        return False
Beispiel #9
0
def _terraform_clean(config):
    """Remove leftover Terraform statefiles and main/cluster files

    Args:
        config (CLIConfig): Loaded StreamAlert CLI
    """
    LOGGER_CLI.info('Cleaning Terraform files')

    cleanup_files = [
        '{}.tf.json'.format(cluster) for cluster in config.clusters()
    ]
    cleanup_files.extend([
        'athena.tf.json', 'main.tf.json', 'terraform.tfstate',
        'terraform.tfstate.backup'
    ])
    for tf_file in cleanup_files:
        file_to_remove = 'terraform/{}'.format(tf_file)
        if not os.path.isfile(file_to_remove):
            continue
        os.remove(file_to_remove)

    # Finally, delete the Terraform directory
    if os.path.isdir('terraform/.terraform/'):
        shutil.rmtree('terraform/.terraform/')
Beispiel #10
0
def create_database(athena_client):
    """Create the 'streamalert' Athena database

    Args:
        athena_client (boto3.client): Instantiated CLI AthenaClient
    """
    if athena_client.check_database_exists():
        LOGGER_CLI.info(
            'The \'streamalert\' database already exists, nothing to do')
        return

    create_db_success, create_db_result = athena_client.run_athena_query(
        query='CREATE DATABASE streamalert')

    if create_db_success and create_db_result['ResultSet'].get('Rows'):
        LOGGER_CLI.info('streamalert database successfully created!')
        LOGGER_CLI.info('results: %s', create_db_result['ResultSet']['Rows'])
Beispiel #11
0
    def add_app_integration(self, app_info):
        """Add a configuration for a new streamalert app integration function

        Args:
            app_info (dict): The necessary values needed to begin configuring
                a new app integration
        """
        exists, prompt_for_auth, overwrite = False, True, False
        app = StreamAlertApp.get_app(app_info, False)

        # Check to see if there is an existing configuration for this app integration
        cluster_config = self.config['clusters'][app_info['cluster']]
        if app_info['app_name'] in cluster_config['modules'].get(
                'stream_alert_apps', {}):
            prompt = (
                'An app with the name \'{}\' is already configured for cluster '
                '\'{}\'. Would you like to update the existing app\'s configuration'
                '?'.format(app_info['app_name'], app_info['cluster']))

            exists = True

            # Return if the user is not deliberately updating an existing config
            if not continue_prompt(message=prompt):
                return

            prompt = (
                'Would you also like to update the authentication information for '
                'app integration with name \'{}\'?'.format(
                    app_info['app_name']))

            # If this is true, we shouldn't prompt again to warn about overwriting
            prompt_for_auth = overwrite = continue_prompt(message=prompt)

        if prompt_for_auth and not save_app_auth_info(app, app_info,
                                                      overwrite):
            return

        apps_config = cluster_config['modules'].get('stream_alert_apps', {})
        local_config_keys = {'interval', 'timeout', 'memory'}
        if not exists:
            # Save a default log level as info to the config
            app_info['log_level'] = 'info'
            app_info['current_version'] = '$LATEST'
            local_config_keys.update({'log_level', 'current_version', 'type'})

            apps_config[app_info['app_name']] = {
                key: app_info[key]
                for key in local_config_keys
            }
        else:
            apps_config[app_info['app_name']].update(
                {key: app_info[key]
                 for key in local_config_keys})

        cluster_config['modules']['stream_alert_apps'] = apps_config

        # Add this service to the sources for this app integration
        # The `stream_alert_app` is purposely singular here
        app_sources = self.config['sources'].get('stream_alert_app', {})
        app_sources[app_info['function_name']] = {'logs': [app.service()]}
        self.config['sources']['stream_alert_app'] = app_sources

        LOGGER_CLI.info(
            'Successfully added \'%s\' app integration to \'conf/clusters/%s.json\' '
            'for service \'%s\'.', app_info['app_name'], app_info['cluster'],
            app_info['type'])

        self.write()
def athena_handler(options):
    """Handle Athena operations"""
    athena_client = StreamAlertAthenaClient(
        CONFIG, results_key_prefix='stream_alert_cli')

    if options.subcommand == 'init':
        CONFIG.generate_athena()

    elif options.subcommand == 'enable':
        CONFIG.set_athena_lambda_enable()

    elif options.subcommand == 'create-db':
        if athena_client.check_database_exists():
            LOGGER_CLI.info(
                'The \'streamalert\' database already exists, nothing to do')
            return

        create_db_success, create_db_result = athena_client.run_athena_query(
            query='CREATE DATABASE streamalert')

        if create_db_success and create_db_result['ResultSet'].get('Rows'):
            LOGGER_CLI.info('streamalert database successfully created!')
            LOGGER_CLI.info('results: %s',
                            create_db_result['ResultSet']['Rows'])

    elif options.subcommand == 'create-table':
        if not options.bucket:
            LOGGER_CLI.error('Missing command line argument --bucket')
            return

        if not options.refresh_type:
            LOGGER_CLI.error('Missing command line argument --refresh_type')
            return

        if options.type == 'data':
            if not options.table_name:
                LOGGER_CLI.error('Missing command line argument --table_name')
                return

            if options.table_name not in enabled_firehose_logs(CONFIG):
                LOGGER_CLI.error(
                    'Table name %s missing from configuration or '
                    'is not enabled.', options.table_name)
                return

            if athena_client.check_table_exists(options.table_name):
                LOGGER_CLI.info('The \'%s\' table already exists.',
                                options.table_name)
                return

            log_info = CONFIG['logs'][options.table_name.replace('_', ':', 1)]
            schema = dict(log_info['schema'])
            schema_statement = ''

            sanitized_schema = StreamAlert.sanitize_keys(schema)

            athena_schema = {}
            schema_type_mapping = {
                'string': 'string',
                'integer': 'int',
                'boolean': 'boolean',
                'float': 'decimal',
                dict: 'map<string, string>',
                list: 'array<string>'
            }

            def add_to_athena_schema(schema, root_key=''):
                """Helper function to add sanitized schemas to the Athena table schema"""
                # Setup the root_key dict
                if root_key and not athena_schema.get(root_key):
                    athena_schema[root_key] = {}

                for key_name, key_type in schema.iteritems():
                    # When using special characters in the beginning or end
                    # of a column name, they have to be wrapped in backticks
                    key_name = '`{}`'.format(key_name)

                    special_key = None
                    # Transform the {} or [] into hashable types
                    if key_type == {}:
                        special_key = dict
                    elif key_type == []:
                        special_key = list
                    # Cast nested dict as a string for now
                    # TODO(jacknagz): support recursive schemas
                    elif isinstance(key_type, dict):
                        special_key = 'string'

                    # Account for envelope keys
                    if root_key:
                        if special_key is not None:
                            athena_schema[root_key][
                                key_name] = schema_type_mapping[special_key]
                        else:
                            athena_schema[root_key][
                                key_name] = schema_type_mapping[key_type]
                    else:
                        if special_key is not None:
                            athena_schema[key_name] = schema_type_mapping[
                                special_key]
                        else:
                            athena_schema[key_name] = schema_type_mapping[
                                key_type]

            add_to_athena_schema(sanitized_schema)

            # Support envelope keys
            configuration_options = log_info.get('configuration')
            if configuration_options:
                envelope_keys = configuration_options.get('envelope_keys')
                if envelope_keys:
                    sanitized_envelope_keys = StreamAlert.sanitize_keys(
                        envelope_keys)
                    # Note: this key is wrapped in backticks to be Hive compliant
                    add_to_athena_schema(sanitized_envelope_keys,
                                         '`streamalert:envelope_keys`')

            for key_name, key_type in athena_schema.iteritems():
                # Account for nested structs
                if isinstance(key_type, dict):
                    struct_schema = ''.join([
                        '{0}:{1},'.format(sub_key, sub_type)
                        for sub_key, sub_type in key_type.iteritems()
                    ])
                    nested_schema_statement = '{0} struct<{1}>, '.format(
                        key_name,
                        # Use the minus index to remove the last comma
                        struct_schema[:-1])
                    schema_statement += nested_schema_statement
                else:
                    schema_statement += '{0} {1},'.format(key_name, key_type)

            query = (
                'CREATE EXTERNAL TABLE {table_name} ({schema}) '
                'PARTITIONED BY (dt string) '
                'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\' '
                'LOCATION \'s3://{bucket}/{table_name}/\''.format(
                    table_name=options.table_name,
                    # Use the minus index to remove the last comma
                    schema=schema_statement[:-1],
                    bucket=options.bucket))

        elif options.type == 'alerts':
            if athena_client.check_table_exists(options.type):
                LOGGER_CLI.info('The \'alerts\' table already exists.')
                return

            query = ('CREATE EXTERNAL TABLE alerts ('
                     'log_source string,'
                     'log_type string,'
                     'outputs array<string>,'
                     'record string,'
                     'rule_description string,'
                     'rule_name string,'
                     'source_entity string,'
                     'source_service string)'
                     'PARTITIONED BY (dt string)'
                     'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\''
                     'LOCATION \'s3://{bucket}/alerts/\''.format(
                         bucket=options.bucket))

        if query:
            create_table_success, _ = athena_client.run_athena_query(
                query=query, database='streamalert')

            if create_table_success:
                CONFIG['lambda']['athena_partition_refresh_config'] \
                      ['refresh_type'][options.refresh_type][options.bucket] = options.type
                CONFIG.write()
                table_name = options.type if options.type == 'alerts' else options.table_name
                LOGGER_CLI.info('The %s table was successfully created!',
                                table_name)
Beispiel #13
0
def athena_handler(options):
    """Handle Athena operations"""
    athena_client = StreamAlertAthenaClient(
        CONFIG, results_key_prefix='stream_alert_cli')

    if options.subcommand == 'init':
        CONFIG.generate_athena()

    elif options.subcommand == 'enable':
        CONFIG.set_athena_lambda_enable()

    elif options.subcommand == 'create-db':
        if athena_client.check_database_exists():
            LOGGER_CLI.info(
                'The \'streamalert\' database already exists, nothing to do')
            return

        create_db_success, create_db_result = athena_client.run_athena_query(
            query='CREATE DATABASE streamalert')

        if create_db_success and create_db_result['ResultSet'].get('Rows'):
            LOGGER_CLI.info('streamalert database successfully created!')
            LOGGER_CLI.info('results: %s',
                            create_db_result['ResultSet']['Rows'])

    elif options.subcommand == 'create-table':
        if not options.bucket:
            LOGGER_CLI.error('Missing command line argument --bucket')
            return

        if not options.refresh_type:
            LOGGER_CLI.error('Missing command line argument --refresh_type')
            return

        if options.type == 'data':
            if not options.table_name:
                LOGGER_CLI.error('Missing command line argument --table_name')
                return

            if options.table_name not in enabled_firehose_logs(CONFIG):
                LOGGER_CLI.error(
                    'Table name %s missing from configuration or '
                    'is not enabled.', options.table_name)
                return

            if athena_client.check_table_exists(options.table_name):
                LOGGER_CLI.info('The \'%s\' table already exists.',
                                options.table_name)
                return

            schema = CONFIG['logs'][options.table_name.replace('_',
                                                               ':')]['schema']
            sanitized_schema = StreamAlert.sanitize_keys(schema)

            athena_schema = {}
            schema_type_mapping = {
                'string': 'string',
                'integer': 'int',
                'boolean': 'boolean',
                'float': 'decimal',
                dict: 'map<string, string>',
                list: 'array<string>'
            }

            for key_name, key_type in sanitized_schema.iteritems():
                # Transform the {} or [] into hashable types
                if key_type == {}:
                    key_type = dict
                elif key_type == []:
                    key_type = list

                athena_schema[key_name] = schema_type_mapping[key_type]

            schema_statement = ''.join([
                '{0} {1},'.format(key_name, key_type)
                for key_name, key_type in athena_schema.iteritems()
            ])[:-1]
            query = ('CREATE EXTERNAL TABLE {table_name} ({schema})'
                     'PARTITIONED BY (dt string)'
                     'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\''
                     'LOCATION \'s3://{bucket}/{table_name}/\''.format(
                         table_name=options.table_name,
                         schema=schema_statement,
                         bucket=options.bucket))

        elif options.type == 'alerts':
            if athena_client.check_table_exists(options.type):
                LOGGER_CLI.info('The \'alerts\' table already exists.')
                return

            query = ('CREATE EXTERNAL TABLE alerts ('
                     'log_source string,'
                     'log_type string,'
                     'outputs array<string>,'
                     'record string,'
                     'rule_description string,'
                     'rule_name string,'
                     'source_entity string,'
                     'source_service string)'
                     'PARTITIONED BY (dt string)'
                     'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\''
                     'LOCATION \'s3://{bucket}/alerts/\''.format(
                         bucket=options.bucket))

        if query:
            create_table_success, _ = athena_client.run_athena_query(
                query=query, database='streamalert')

            if create_table_success:
                CONFIG['lambda']['athena_partition_refresh_config'] \
                      ['refresh_type'][options.refresh_type][options.bucket] = options.type
                CONFIG.write()
                LOGGER_CLI.info('The %s table was successfully created!',
                                options.type)
Beispiel #14
0
    def _publish_helper(self, **kwargs):
        """Handle clustered or single Lambda function publishing

        Keyword Arguments:
            cluster (str): The cluster to deploy to, this is optional

        Returns:
            bool: Result of the function publishes
        """
        cluster = kwargs.get('cluster')
        # Clustered Lambda functions have a different naming pattern
        if cluster:
            region = self.config['clusters'][cluster]['region']
            function_name = '{}_{}_streamalert_{}'.format(
                self.config['global']['account']['prefix'], cluster,
                self.package.package_name)
        else:
            region = self.config['global']['account']['region']
            function_name = '{}_streamalert_{}'.format(
                self.config['global']['account']['prefix'],
                self.package.package_name)

        # Configure the Lambda client
        client = boto3.client('lambda', region_name=region)
        code_sha_256 = self.config['lambda'][
            self.package.config_key]['source_current_hash']

        # Publish the function(s)
        # TODO: move the extra logic into the LambdaPackage subclasses instead of this
        if self.package.package_name == 'stream_alert_app':
            if not 'stream_alert_apps' in self.config['clusters'][cluster][
                    'modules']:
                return True  # nothing to publish for this cluster

            for app_name, app_info in self.config['clusters'][cluster]['modules'] \
                ['stream_alert_apps'].iteritems():
                # Name follows format: '<prefix>_<cluster>_<service>_<app_name>_app'
                function_name = '_'.join([
                    self.config['global']['account']['prefix'], cluster,
                    app_info['type'], app_name, 'app'
                ])
                new_version = self._publish(client, function_name,
                                            code_sha_256)
                if not new_version:
                    continue

                LOGGER_CLI.info('Published version %s for %s:%s', new_version,
                                cluster, function_name)

                app_info['current_version'] = new_version

        else:

            new_version = self._publish(client, function_name, code_sha_256)
            if not new_version:
                return False

            # Update the config
            if cluster:
                LOGGER_CLI.info('Published version %s for %s:%s', new_version,
                                cluster, function_name)
                self.config['clusters'][cluster]['modules']['stream_alert'] \
                    [self.package.package_name]['current_version'] = new_version
            else:
                LOGGER_CLI.info('Published version %s for %s', new_version,
                                function_name)
                self.config['lambda'][
                    self.package.config_key]['current_version'] = new_version

        self.config.write()

        return True
Beispiel #15
0
    def add_metric_alarm(self, alarm_info):
        """Add a metric alarm that corresponds to a predefined metrics

        Args:
            alarm_info (dict): All the necessary values needed to add a CloudWatch
                metric alarm
        """
        # Check to see if an alarm with this name already exists
        if self._alarm_exists(alarm_info['alarm_name']):
            return

        # Get the current metrics for each function
        current_metrics = metrics.MetricLogger.get_available_metrics()

        # Extract the function name this metric is associated with
        metric_function = {metric: function for function in current_metrics
                           for metric in current_metrics[function]}[alarm_info['metric_name']]

        # Do not continue if the user is trying to apply a metric alarm for an athena
        # metric to a specific cluster (since the athena function operates on all clusters)
        if (alarm_info['metric_target'] != 'aggregate' and
                metric_function == metrics.ATHENA_PARTITION_REFRESH_NAME):
            LOGGER_CLI.error('Metrics for the athena function can only be applied '
                             'to an aggregate metric target, not on a per-cluster basis.')
            return

        # If the metric is related to either the rule processor or alert processor, we should
        # check to see if any cluster has metrics enabled for that function before continuing
        if (metric_function in {metrics.ALERT_PROCESSOR_NAME, metrics.RULE_PROCESSOR_NAME} and
                not any(self.config['clusters'][cluster]['modules']['stream_alert']
                        [metric_function].get('enable_metrics') for cluster in
                        self.config['clusters'])):
            prompt = ('Metrics are not currently enabled for the \'{}\' function '
                      'within any cluster. Creating an alarm will have no effect '
                      'until metrics are enabled for this function in at least one '
                      'cluster. Would you still like to continue?'.format(metric_function))
            if not continue_prompt(message=prompt):
                return

        elif metric_function == metrics.ATHENA_PARTITION_REFRESH_NAME:
            # If the user is attempting to add a metric for athena, make sure the athena
            # function is initialized first
            if 'athena_partition_refresh_config' not in self.config['lambda']:
                LOGGER_CLI.error('No configuration found for Athena Partition Refresh. '
                                 'Please run: `$ python manage.py athena init` first.')
                return

            # If the athena function is initialized, but metrics are not enabled, ask
            # the user if they would like to enable them now
            if not self.config['lambda']['athena_partition_refresh_config'].get('enable_metrics'):
                prompt = ('Metrics are not currently enabled for the \'athena\' function. '
                          'Would you like to enable metrics for athena?')

                if continue_prompt(message=prompt):
                    self.toggle_metrics(True, None, [metric_function])

                elif not continue_prompt(message='Would you still like to add this alarm '
                                                 'even though metrics are disabled?'):
                    return

        # Add metric alarms for the aggregate metrics - these are added to the global config
        if (alarm_info['metric_target'] == 'aggregate' or
                metric_function == metrics.ATHENA_PARTITION_REFRESH_NAME):
            global_config = self.config['global']['infrastructure']['monitoring']

            metric_alarms = global_config.get('metric_alarms', {})
            if not metric_alarms:
                global_config['metric_alarms'] = {}

            metric_alarms = global_config['metric_alarms'].get(metric_function, {})
            if not metric_alarms:
                global_config['metric_alarms'][metric_function] = {}

            # Format the metric name for the aggregate metric
            alarm_settings = alarm_info.copy()
            alarm_settings['metric_name'] = '{}-{}'.format(metrics.FUNC_PREFIXES[metric_function],
                                                           alarm_info['metric_name'])

            new_alarms = self._add_metric_alarm_config(alarm_settings, metric_alarms)
            if new_alarms != False:
                global_config['metric_alarms'][metric_function] = new_alarms
                LOGGER_CLI.info('Successfully added \'%s\' metric alarm to '
                                '\'conf/global.json\'.', alarm_settings['alarm_name'])

        else:
            # Add metric alarms on a per-cluster basis - these are added to the cluster config
            self._add_metric_alarm_per_cluster(alarm_info, metric_function)

        # Save all of the alarm updates to disk
        self.write()
Beispiel #16
0
    def add_app_integration(self, app_info):
        """Add a configuration for a new streamalert app integration function

        Args:
            app_info (dict): The necessary values needed to begin configuring
                a new app integration
        """
        exists, prompt_for_auth, overwrite = False, True, False
        app = StreamAlertApp.get_app(app_info, False)

        cluster_name = app_info['cluster']
        app_name = app_info['app_name']
        func_name = app_info['function_name']

        # Check to see if there is an existing configuration for this app integration
        cluster_config = self.config['clusters'][cluster_name]

        if func_name in cluster_config['modules'].get('stream_alert_apps', {}):
            prompt = (
                'An app with the name \'{}\' is already configured for cluster '
                '\'{}\'. Would you like to update the existing app\'s configuration'
                '?'.format(app_name, cluster_name))

            exists = True

            # Return if the user is not deliberately updating an existing config
            if not continue_prompt(message=prompt):
                return

            prompt = (
                'Would you also like to update the authentication information for '
                'app integration with name \'{}\'?'.format(app_name))

            # If this is true, we shouldn't prompt again to warn about overwriting
            prompt_for_auth = overwrite = continue_prompt(message=prompt)

        if prompt_for_auth and not save_app_auth_info(app, app_info,
                                                      overwrite):
            return

        apps_config = cluster_config['modules'].get('stream_alert_apps', {})
        if not exists:
            # Save a default app settings to the config for new apps
            new_app_config = {
                'app_name': app_info['app_name'],
                'concurrency_limit': 2,
                'log_level': 'info',
                'log_retention_days': 14,
                'memory': app_info['memory'],
                'metric_alarms': {
                    'errors': {
                        'enabled': True,
                        'evaluation_periods': 1,
                        'period_secs': 120
                    }
                },
                'schedule_expression': app_info['schedule_expression'],
                'timeout': app_info['timeout'],
                'type': app_info['type']
            }
            apps_config[func_name] = new_app_config
        else:

            # Allow for updating certain attributes for the app without overwriting
            # current parts of the configuration
            updated_app_config = {
                'memory': app_info['memory'],
                'schedule_expression': app_info['schedule_expression'],
                'timeout': app_info['timeout']
            }
            apps_config[func_name].update(updated_app_config)

        cluster_config['modules']['stream_alert_apps'] = apps_config

        # Add this service to the sources for this app integration
        # The `stream_alert_app` is purposely singular here
        app_sources = self.config['sources'].get('stream_alert_app', {})
        app_sources[app_info['function_name']] = {'logs': [app.service()]}
        self.config['sources']['stream_alert_app'] = app_sources

        LOGGER_CLI.info(
            'Successfully added \'%s\' app integration to \'conf/clusters/%s.json\' '
            'for service \'%s\'.', app_info['app_name'], app_info['cluster'],
            app_info['type'])

        self.write()
Beispiel #17
0
def format_lambda_test_record(test_record):
    """Create a properly formatted Kinesis, S3, or SNS record.

    Supports a dictionary or string based data record.  Reads in
    event templates from the tests/integration/templates folder.

    Args:
        test_record (dict): Test record metadata dict with the following structure:
            data - string or dict of the raw data
            description - a string describing the test that is being performed
            trigger - bool of if the record should produce an alert
            source - which stream/s3 bucket originated the data
            service - which aws service originated the data
            compress (optional) - if the payload needs to be gzip compressed or not

    Returns:
        dict: in the format of the specific service
    """
    service = test_record['service']
    source = test_record['source']
    compress = test_record.get('compress')

    data_type = type(test_record['data'])
    if data_type == dict:
        data = json.dumps(test_record['data'])
    elif data_type in (unicode, str):
        data = test_record['data']
    else:
        LOGGER_CLI.info('Invalid data type: %s', data_type)
        return

    # Get the template file for this particular service
    record_template = _get_record_template(service)
    if not record_template:
        return

    if service == 's3':
        # Set the S3 object key to a random value for testing
        test_record['key'] = ('{:032X}'.format(random.randrange(16**32)))
        record_template['s3']['object']['key'] = test_record['key']
        record_template['s3']['object']['size'] = len(data)
        record_template['s3']['bucket']['arn'] = 'arn:aws:s3:::{}'.format(
            source)
        record_template['s3']['bucket']['name'] = source

        # Create the mocked s3 object in the designated bucket with the random key
        put_mock_s3_object(source, test_record['key'], data, 'us-east-1')

    elif service == 'kinesis':
        if compress:
            kinesis_data = base64.b64encode(zlib.compress(data))
        else:
            kinesis_data = base64.b64encode(data)

        record_template['kinesis']['data'] = kinesis_data
        record_template['eventSourceARN'] = (
            'arn:aws:kinesis:us-east-1:111222333:'
            'stream/{}'.format(source))

    elif service == 'sns':
        record_template['Sns']['Message'] = data
        record_template[
            'EventSubscriptionArn'] = 'arn:aws:sns:us-east-1:111222333:{}'.format(
                source)

    elif service == 'stream_alert_app':
        record_template['stream_alert_app'] = source
        record_template['logs'] = [data]

    else:
        LOGGER_CLI.info('Invalid service %s', service)

    return record_template
Beispiel #18
0
def main():
    """Entry point for the CLI."""
    parser = build_parser()
    options = parser.parse_args()
    cli_runner(options)
    LOGGER_CLI.info('Completed')
def generate_cloudtrail(cluster_name, cluster_dict, config):
    """Add the CloudTrail module to the Terraform cluster dict.

    Args:
        cluster_name (str): The name of the currently generating cluster
        cluster_dict (defaultdict): The dict containing all Terraform config for a given cluster.
        config (dict): The loaded config from the 'conf/' directory

    Returns:
        bool: Result of applying the cloudtrail module
    """
    modules = config['clusters'][cluster_name]['modules']
    cloudtrail_module = 'cloudtrail_{}'.format(cluster_name)

    enabled_legacy = modules['cloudtrail'].get('enabled')

    cloudtrail_enabled = modules['cloudtrail'].get('enable_logging', True)
    kinesis_enabled = modules['cloudtrail'].get('enable_kinesis', True)
    send_to_cloudwatch = modules['cloudtrail'].get('send_to_cloudwatch', False)
    exclude_home_region = modules['cloudtrail'].get('exclude_home_region_events', False)

    account_ids = list(
        set([config['global']['account']['aws_account_id']] + modules['cloudtrail'].get(
            'cross_account_ids', [])))

    # Allow for backwards compatilibity
    if enabled_legacy:
        del config['clusters'][cluster_name]['modules']['cloudtrail']['enabled']
        config['clusters'][cluster_name]['modules']['cloudtrail']['enable_logging'] = True
        config['clusters'][cluster_name]['modules']['cloudtrail']['enable_kinesis'] = True
        LOGGER_CLI.info('Converting legacy CloudTrail config')
        config.write()
        kinesis_enabled = True
        cloudtrail_enabled = True

    existing_trail = modules['cloudtrail'].get('existing_trail', False)
    is_global_trail = modules['cloudtrail'].get('is_global_trail', True)
    region = config['global']['account']['region']

    event_pattern_default = {'account': [config['global']['account']['aws_account_id']]}
    event_pattern = modules['cloudtrail'].get('event_pattern', event_pattern_default)

    # From here: http://amzn.to/2zF7CS0
    valid_event_pattern_keys = {
        'version', 'id', 'detail-type', 'source', 'account', 'time', 'region', 'resources', 'detail'
    }
    if not set(event_pattern.keys()).issubset(valid_event_pattern_keys):
        LOGGER_CLI.error('Config Error: Invalid CloudWatch Event Pattern!')
        return False

    module_info = {
        'source': 'modules/tf_stream_alert_cloudtrail',
        'account_ids': account_ids,
        'cluster': cluster_name,
        'prefix': config['global']['account']['prefix'],
        'enable_logging': cloudtrail_enabled,
        'enable_kinesis': kinesis_enabled,
        's3_logging_bucket': config['global']['s3_access_logging']['logging_bucket'],
        'existing_trail': existing_trail,
        'send_to_cloudwatch': send_to_cloudwatch,
        'exclude_home_region_events': exclude_home_region,
        'region': region,
        'is_global_trail': is_global_trail
    }

    # use the kinesis output from the kinesis streams module
    if kinesis_enabled:
        module_info['kinesis_arn'] = '${{module.kinesis_{}.arn}}'.format(cluster_name)
        module_info['event_pattern'] = json.dumps(event_pattern)

    if send_to_cloudwatch:
        destination_arn = modules['cloudtrail'].get(
            'cloudwatch_destination_arn',
            '${{module.cloudwatch_{}_{}.cloudwatch_destination_arn}}'.format(cluster_name,
                                                                             region)
        )
        module_info['cloudwatch_destination_arn'] = destination_arn

    cluster_dict['module'][cloudtrail_module] = module_info

    return True
Beispiel #20
0
def main():
    parser = build_parser()
    options = parser.parse_args()
    cli_runner(options)
    LOGGER_CLI.info('Completed')
Beispiel #21
0
def _terraform_init(config):
    """Initialize infrastructure using Terraform

    Args:
        config (CLIConfig): Loaded StreamAlert CLI
    """
    LOGGER_CLI.info('Initializing StreamAlert')

    # generate init Terraform files
    if not terraform_generate(config=config, init=True):
        return

    LOGGER_CLI.info('Initializing Terraform')
    if not run_command(['terraform', 'init']):
        sys.exit(1)

    # build init infrastructure
    LOGGER_CLI.info('Building Initial Infrastructure')
    init_targets = [
        'aws_s3_bucket.lambda_source', 'aws_s3_bucket.logging_bucket',
        'aws_s3_bucket.stream_alert_secrets',
        'aws_s3_bucket.terraform_remote_state', 'aws_s3_bucket.streamalerts',
        'aws_kms_key.stream_alert_secrets',
        'aws_kms_alias.stream_alert_secrets'
    ]
    if not tf_runner(targets=init_targets):
        LOGGER_CLI.error('An error occurred while running StreamAlert init')
        sys.exit(1)

    # generate the main.tf with remote state enabled
    LOGGER_CLI.info('Configuring Terraform Remote State')
    if not terraform_generate(config=config):
        return

    if not run_command(['terraform', 'init']):
        return

    # Use a named tuple to match the 'processor' attribute in the argparse options
    deploy_opts = namedtuple('DeployOptions', ['processor', 'clusters'])

    LOGGER_CLI.info('Deploying Lambda Functions')

    deploy(deploy_opts(['rule', 'alert', 'alert_merger', 'athena'], []),
           config)

    # we need to manually create the streamalerts table since terraform does not support this
    # See: https://github.com/terraform-providers/terraform-provider-aws/issues/1486
    alerts_bucket = '{}.streamalerts'.format(
        config['global']['account']['prefix'])
    create_table('alerts', alerts_bucket, config)

    LOGGER_CLI.info('Building Remainder Infrastructure')
    tf_runner(refresh=False)
Beispiel #22
0
def rebuild_partitions(table, bucket, config):
    """Rebuild an Athena table's partitions

    Steps:
      - Get the list of current partitions
      - Destroy existing table
      - Re-create tables
      - Re-create partitions

    Args:
        table (str): The name of the table being rebuilt
        bucket (str): The s3 bucket to be used as the location for Athena data
        table_type (str): The type of table being refreshed
            Types of 'data' and 'alert' are accepted, but only 'data' is implemented
        config (CLIConfig): Loaded StreamAlert CLI
    """
    athena_client = StreamAlertAthenaClient(
        config, results_key_prefix='stream_alert_cli')

    sa_firehose = StreamAlertFirehose(
        config['global']['account']['region'],
        config['global']['infrastructure']['firehose'], config['logs'])

    sanitized_table_name = sa_firehose.firehose_log_name(table)

    # Get the current set of partitions
    partition_success, partitions = athena_client.run_athena_query(
        query='SHOW PARTITIONS {}'.format(sanitized_table_name),
        database=athena_client.sa_database)
    if not partition_success:
        LOGGER_CLI.error('An error occurred when loading partitions for %s',
                         sanitized_table_name)
        return

    unique_partitions = athena_helpers.unique_values_from_query(partitions)

    if not unique_partitions:
        LOGGER_CLI.info('No partitions to rebuild for %s, nothing to do',
                        sanitized_table_name)
        return

    # Drop the table
    LOGGER_CLI.info('Dropping table %s', sanitized_table_name)
    drop_success, _ = athena_client.run_athena_query(
        query='DROP TABLE {}'.format(sanitized_table_name),
        database=athena_client.sa_database)
    if not drop_success:
        LOGGER_CLI.error('An error occurred when dropping the %s table',
                         sanitized_table_name)
        return

    LOGGER_CLI.info('Dropped table %s', sanitized_table_name)

    LOGGER_CLI.info('Creating table %s', sanitized_table_name)

    # Re-create the table with previous partitions
    create_table(table, bucket, config)

    new_partitions_statement = athena_helpers.partition_statement(
        unique_partitions, bucket, sanitized_table_name)

    # Make sure our new alter table statement is within the query API limits
    if len(new_partitions_statement) > MAX_QUERY_LENGTH:
        LOGGER_CLI.error(
            'Partition statement too large, writing to local file')
        with open('partitions_{}.txt'.format(sanitized_table_name),
                  'w') as partition_file:
            partition_file.write(new_partitions_statement)
        return

    LOGGER_CLI.info('Creating %d new partitions for %s',
                    len(unique_partitions), sanitized_table_name)
    new_part_success, _ = athena_client.run_athena_query(
        query=new_partitions_statement, database=athena_client.sa_database)
    if not new_part_success:
        LOGGER_CLI.error('Error re-creating new partitions for %s',
                         sanitized_table_name)
        return

    LOGGER_CLI.info('Successfully rebuilt partitions for %s',
                    sanitized_table_name)
Beispiel #23
0
def terraform_generate(config, init=False):
    """Generate all Terraform plans for the configured clusters.

    Keyword Args:
        config (dict): The loaded config from the 'conf/' directory
        init (bool): Indicates if main.tf.json is generated for `terraform init`

    Returns:
        bool: Result of cluster generating
    """
    cleanup_old_tf_files(config)

    # Setup the main.tf.json file
    LOGGER_CLI.debug('Generating cluster file: main.tf.json')
    with open('terraform/main.tf.json', 'w') as tf_file:
        json.dump(
            generate_main(init=init, config=config),
            tf_file,
            indent=2,
            sort_keys=True
        )

    # Return early during the init process, clusters are not needed yet
    if init:
        return True

    # Setup cluster files
    for cluster in config.clusters():
        if cluster in RESTRICTED_CLUSTER_NAMES:
            raise InvalidClusterName(
                'Rename cluster "main" or "athena" to something else!')

        LOGGER_CLI.debug('Generating cluster file: %s.tf.json', cluster)
        cluster_dict = generate_cluster(cluster_name=cluster, config=config)
        if not cluster_dict:
            LOGGER_CLI.error(
                'An error was generated while creating the %s cluster', cluster)
            return False

        with open('terraform/{}.tf.json'.format(cluster), 'w') as tf_file:
            json.dump(
                cluster_dict,
                tf_file,
                indent=2,
                sort_keys=True
            )

    # Setup Athena if it is enabled
    athena_config = config['lambda'].get('athena_partition_refresh_config')
    if athena_config:
        athena_file = 'terraform/athena.tf.json'
        if athena_config['enabled']:
            athena_generated_config = generate_athena(config=config)
            if athena_generated_config:
                with open(athena_file, 'w') as tf_file:
                    json.dump(
                        athena_generated_config,
                        tf_file,
                        indent=2,
                        sort_keys=True
                    )
        # Remove Athena file if it's disabled
        else:
            if os.path.isfile(athena_file):
                LOGGER_CLI.info('Removing old Athena Terraform file')
                os.remove(athena_file)

    return True
def create_table(athena_client, options, config):
    """Create a 'streamalert' Athena table

    Args:
        athena_client (boto3.client): Instantiated CLI AthenaClient
        options (namedtuple): The parsed args passed from the CLI
        config (CLIConfig): Loaded StreamAlert CLI
    """
    if not options.bucket:
        LOGGER_CLI.error('Missing command line argument --bucket')
        return

    if not options.refresh_type:
        LOGGER_CLI.error('Missing command line argument --refresh_type')
        return

    if options.type == 'data':
        if not options.table_name:
            LOGGER_CLI.error('Missing command line argument --table_name')
            return

        if options.table_name not in terraform_cli_helpers.enabled_firehose_logs(
                config):
            LOGGER_CLI.error(
                'Table name %s missing from configuration or '
                'is not enabled.', options.table_name)
            return

        if athena_client.check_table_exists(options.table_name):
            LOGGER_CLI.info('The \'%s\' table already exists.',
                            options.table_name)
            return

        log_info = config['logs'][options.table_name.replace('_', ':', 1)]
        schema = dict(log_info['schema'])
        schema_statement = ''

        sanitized_schema = StreamAlert.sanitize_keys(schema)
        athena_schema = {}

        _add_to_athena_schema(sanitized_schema, athena_schema)

        # Support envelope keys
        configuration_options = log_info.get('configuration')
        if configuration_options:
            envelope_keys = configuration_options.get('envelope_keys')
            if envelope_keys:
                sanitized_envelope_key_schema = StreamAlert.sanitize_keys(
                    envelope_keys)
                # Note: this key is wrapped in backticks to be Hive compliant
                _add_to_athena_schema(sanitized_envelope_key_schema,
                                      athena_schema,
                                      '`streamalert:envelope_keys`')

        for key_name, key_type in athena_schema.iteritems():
            # Account for nested structs
            if isinstance(key_type, dict):
                struct_schema = ''.join([
                    '{0}:{1},'.format(sub_key, sub_type)
                    for sub_key, sub_type in key_type.iteritems()
                ])
                nested_schema_statement = '{0} struct<{1}>, '.format(
                    key_name,
                    # Use the minus index to remove the last comma
                    struct_schema[:-1])
                schema_statement += nested_schema_statement
            else:
                schema_statement += '{0} {1},'.format(key_name, key_type)

        query = (
            'CREATE EXTERNAL TABLE {table_name} ({schema}) '
            'PARTITIONED BY (dt string) '
            'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\' '
            'WITH SERDEPROPERTIES ( \'ignore.malformed.json\' = \'true\') '
            'LOCATION \'s3://{bucket}/{table_name}/\''.format(
                table_name=options.table_name,
                # Use the minus index to remove the last comma
                schema=schema_statement[:-1],
                bucket=options.bucket))

    elif options.type == 'alerts':
        if athena_client.check_table_exists(options.type):
            LOGGER_CLI.info('The \'alerts\' table already exists.')
            return

        query = ('CREATE EXTERNAL TABLE alerts ('
                 'log_source string,'
                 'log_type string,'
                 'outputs array<string>,'
                 'record string,'
                 'rule_description string,'
                 'rule_name string,'
                 'source_entity string,'
                 'source_service string)'
                 'PARTITIONED BY (dt string)'
                 'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\''
                 'LOCATION \'s3://{bucket}/alerts/\''.format(
                     bucket=options.bucket))

    if query:
        create_table_success, _ = athena_client.run_athena_query(
            query=query, database='streamalert')

        if create_table_success:
            # Update the CLI config
            config['lambda']['athena_partition_refresh_config'] \
                  ['refresh_type'][options.refresh_type][options.bucket] = options.type
            config.write()

            table_name = options.type if options.type == 'alerts' else options.table_name
            LOGGER_CLI.info('The %s table was successfully created!',
                            table_name)
Beispiel #25
0
def terraform_handler(options):
    """Handle all Terraform CLI operations"""
    # Verify terraform is installed
    if not terraform_check():
        return
    # Use a named tuple to match the 'processor' attribute in the argparse options
    deploy_opts = namedtuple('DeployOptions', ['processor'])

    # Plan and Apply our streamalert infrastructure
    if options.subcommand == 'build':
        # Generate Terraform files
        if not terraform_generate(config=CONFIG):
            return
        # Target is for terraforming a specific streamalert module.
        # This value is passed as a list
        if options.target:
            targets = ['module.{}_{}'.format(target, cluster)
                       for cluster in CONFIG.clusters()
                       for target in options.target]
            tf_runner(targets=targets)
        else:
            tf_runner()

    # generate terraform files
    elif options.subcommand == 'generate':
        if not terraform_generate(config=CONFIG):
            return

    elif options.subcommand == 'init-backend':
        run_command(['terraform', 'init'])

    # initialize streamalert infrastructure from a blank state
    elif options.subcommand == 'init':
        LOGGER_CLI.info('Initializing StreamAlert')

        # generate init Terraform files
        if not terraform_generate(config=CONFIG, init=True):
            return

        LOGGER_CLI.info('Initializing Terraform')
        if not run_command(['terraform', 'init']):
            sys.exit(1)

        # build init infrastructure
        LOGGER_CLI.info('Building Initial Infrastructure')
        init_targets = [
            'aws_s3_bucket.lambda_source',
            'aws_s3_bucket.logging_bucket',
            'aws_s3_bucket.stream_alert_secrets',
            'aws_s3_bucket.terraform_remote_state',
            'aws_s3_bucket.streamalerts',
            'aws_kms_key.stream_alert_secrets',
            'aws_kms_alias.stream_alert_secrets'
        ]
        if not tf_runner(targets=init_targets):
            LOGGER_CLI.error('An error occured while running StreamAlert init')
            sys.exit(1)

        # generate the main.tf with remote state enabled
        LOGGER_CLI.info('Configuring Terraform Remote State')
        if not terraform_generate(config=CONFIG):
            return

        if not run_command(['terraform', 'init']):
            return

        LOGGER_CLI.info('Deploying Lambda Functions')
        # deploy both lambda functions
        deploy(deploy_opts('all'))
        # create all remainder infrastructure

        LOGGER_CLI.info('Building Remainder Infrastructure')
        tf_runner()

    elif options.subcommand == 'clean':
        terraform_clean()

    elif options.subcommand == 'destroy':
        if options.target:
            target = options.target
            targets = ['module.{}_{}'.format(target, cluster)
                       for cluster in CONFIG.clusters()]
            tf_runner(targets=targets, action='destroy')
            return

        # Migrate back to local state so Terraform can successfully
        # destroy the S3 bucket used by the backend.
        if not terraform_generate(config=CONFIG, init=True):
            return

        if not run_command(['terraform', 'init']):
            return

        # Destroy all of the infrastructure
        if not tf_runner(action='destroy'):
            return

        # Remove old Terraform files
        terraform_clean()

    # get a quick status on our declared infrastructure
    elif options.subcommand == 'status':
        status()
Beispiel #26
0
def generate_monitoring(cluster_name, cluster_dict, config):
    """Add the CloudWatch Monitoring module to the Terraform cluster dict.

    Example configuration:

    "cloudwatch_monitoring": {
      "enabled": true,
      "kinesis_alarms_enabled": true,
      "lambda_alarms_enabled": true,
      "settings": {
        "lambda_invocation_error_period": "600",
        "kinesis_iterator_age_error_period": "600",
        "kinesis_write_throughput_exceeded_threshold": "100"
      }
    }

    Args:
        cluster_name (str): The name of the currently generating cluster
        cluster_dict (defaultdict): The dict containing all Terraform config for a given cluster.
        config (dict): The loaded config from the 'conf/' directory

    Returns:
        bool: Result of applying the cloudwatch_monitoring module
    """
    prefix = config['global']['account']['prefix']
    infrastructure_config = config['global'].get('infrastructure')
    monitoring_config = config['clusters'][cluster_name]['modules']['cloudwatch_monitoring']
    sns_topic_arn = None

    if not (infrastructure_config and 'monitoring' in infrastructure_config):
        LOGGER_CLI.error('Invalid config: Make sure you declare global infrastructure options!')
        return False

    if not monitoring_config.get('enabled', False):
        LOGGER_CLI.info('CloudWatch Monitoring not enabled, skipping...')
        return True

    if infrastructure_config['monitoring'].get('create_sns_topic'):
        topic_name = 'stream_alert_monitoring'

    elif infrastructure_config['monitoring'].get('sns_topic_name'):
        topic_name = infrastructure_config['monitoring']['sns_topic_name']

    sns_topic_arn = 'arn:aws:sns:{region}:{account_id}:{topic}'.format(
        region=config['global']['account']['region'],
        account_id=config['global']['account']['aws_account_id'],
        topic=topic_name)

    cluster_dict['module']['cloudwatch_monitoring_{}'.format(cluster_name)] = {
        'source': 'modules/tf_stream_alert_monitoring',
        'sns_topic_arn': sns_topic_arn,
        'kinesis_alarms_enabled': False,
        'lambda_alarms_enabled': False
    }

    if monitoring_config.get('lambda_alarms_enabled', True):
        cluster_dict['module']['cloudwatch_monitoring_{}'.format(cluster_name)].update({
            'lambda_functions': [
                '{}_{}_streamalert_rule_processor'.format(prefix, cluster_name),
                '{}_{}_streamalert_alert_processor'.format(prefix, cluster_name)
            ],
            'lambda_alarms_enabled': True
        })

    if monitoring_config.get('kinesis_alarms_enabled', True):
        cluster_dict['module']['cloudwatch_monitoring_{}'.format(cluster_name)].update({
            'kinesis_stream': '{}_{}_stream_alert_kinesis'.format(prefix, cluster_name),
            'kinesis_alarms_enabled': True
        })

    # Add support for custom settings for tweaking alarm thresholds, eval periods, and periods
    # Note: This does not strictly check for proper variable names, since there are so many.
    #       Instead, Terraform will error out if an imporper name is used.
    #       Also, every value in these settings should be a string, so cast for safety.
    for setting_name, setting_value in monitoring_config.get('settings', {}).iteritems():
        cluster_dict['module']['cloudwatch_monitoring_{}'.format(
            cluster_name)][setting_name] = str(setting_value)

    return True
Beispiel #27
0
def create_table(athena_client, options, config):
    """Create a 'streamalert' Athena table

    Args:
        athena_client (boto3.client): Instantiated CLI AthenaClient
        options (namedtuple): The parsed args passed from the CLI
        config (CLIConfig): Loaded StreamAlert CLI
    """
    sa_firehose = StreamAlertFirehose(
        config['global']['account']['region'],
        config['global']['infrastructure']['firehose'], config['logs'])

    if not options.bucket:
        LOGGER_CLI.error('Missing command line argument --bucket')
        return

    if not options.refresh_type:
        LOGGER_CLI.error('Missing command line argument --refresh_type')
        return

    if options.type == 'data':
        if not options.table_name:
            LOGGER_CLI.error('Missing command line argument --table_name')
            return

        # Convert special characters in schema name to underscores
        sanitized_table_name = sa_firehose.firehose_log_name(
            options.table_name)

        # Check that the log type is enabled via Firehose
        if sanitized_table_name not in sa_firehose.enabled_logs:
            LOGGER_CLI.error(
                'Table name %s missing from configuration or '
                'is not enabled.', sanitized_table_name)
            return

        # Check if the table exists
        if athena_client.check_table_exists(sanitized_table_name):
            LOGGER_CLI.info('The \'%s\' table already exists.',
                            sanitized_table_name)
            return

        log_info = config['logs'][options.table_name.replace('_', ':', 1)]

        schema = dict(log_info['schema'])
        sanitized_schema = StreamAlertFirehose.sanitize_keys(schema)

        athena_schema = handler_helpers.to_athena_schema(sanitized_schema)

        # Add envelope keys to Athena Schema
        configuration_options = log_info.get('configuration')
        if configuration_options:
            envelope_keys = configuration_options.get('envelope_keys')
            if envelope_keys:
                sanitized_envelope_key_schema = StreamAlertFirehose.sanitize_keys(
                    envelope_keys)
                # Note: this key is wrapped in backticks to be Hive compliant
                athena_schema[
                    '`streamalert:envelope_keys`'] = handler_helpers.to_athena_schema(
                        sanitized_envelope_key_schema)

        # Handle Schema overrides
        #   This is useful when an Athena schema needs to differ from the normal log schema
        if options.schema_override:
            for override in options.schema_override:
                if '=' not in override:
                    LOGGER_CLI.error(
                        'Invalid schema override [%s], use column_name=type format',
                        override)
                    return

                column_name, column_type = override.split('=')
                if not all([column_name, column_type]):
                    LOGGER_CLI.error(
                        'Invalid schema override [%s], use column_name=type format',
                        override)

                # Columns are escaped to avoid Hive issues with special characters
                column_name = '`{}`'.format(column_name)
                if column_name in athena_schema:
                    athena_schema[column_name] = column_type
                    LOGGER_CLI.info('Applied schema override: %s:%s',
                                    column_name, column_type)
                else:
                    LOGGER_CLI.error(
                        'Schema override column %s not found in Athena Schema, skipping',
                        column_name)

        query = _construct_create_table_statement(
            schema=athena_schema,
            table_name=sanitized_table_name,
            bucket=options.bucket)

    elif options.type == 'alerts':
        if athena_client.check_table_exists(options.type):
            LOGGER_CLI.info('The \'alerts\' table already exists.')
            return
        query = ALERTS_TABLE_STATEMENT.format(bucket=options.bucket)

    if query:
        create_table_success, _ = athena_client.run_athena_query(
            query=query, database='streamalert')

        if create_table_success:
            # Update the CLI config
            config['lambda']['athena_partition_refresh_config'] \
                  ['refresh_type'][options.refresh_type][options.bucket] = options.type
            config.write()

            table_name = options.type if options.type == 'alerts' else sanitized_table_name
            LOGGER_CLI.info('The %s table was successfully created!',
                            table_name)
Beispiel #28
0
def terraform_handler(options, config):
    """Handle all Terraform CLI operations

    Args:
        options (namedtuple): Parsed arguments from manage.py
    """
    # Check for valid credentials
    if not check_credentials():
        return

    # Verify terraform is installed
    if not terraform_check():
        return
    # Use a named tuple to match the 'processor' attribute in the argparse options
    deploy_opts = namedtuple('DeployOptions', ['processor', 'clusters'])

    # Plan and Apply our streamalert infrastructure
    if options.subcommand == 'build':
        terraform_build(options, config)

    # generate terraform files
    elif options.subcommand == 'generate':
        if not terraform_generate(config=config):
            return

    elif options.subcommand == 'init-backend':
        run_command(['terraform', 'init'])

    # initialize streamalert infrastructure from a blank state
    elif options.subcommand == 'init':
        LOGGER_CLI.info('Initializing StreamAlert')

        # generate init Terraform files
        if not terraform_generate(config=config, init=True):
            return

        LOGGER_CLI.info('Initializing Terraform')
        if not run_command(['terraform', 'init']):
            sys.exit(1)

        # build init infrastructure
        LOGGER_CLI.info('Building Initial Infrastructure')
        init_targets = [
            'aws_s3_bucket.lambda_source', 'aws_s3_bucket.logging_bucket',
            'aws_s3_bucket.stream_alert_secrets',
            'aws_s3_bucket.terraform_remote_state',
            'aws_s3_bucket.streamalerts', 'aws_kms_key.stream_alert_secrets',
            'aws_kms_alias.stream_alert_secrets'
        ]
        if not tf_runner(targets=init_targets):
            LOGGER_CLI.error('An error occured while running StreamAlert init')
            sys.exit(1)

        # generate the main.tf with remote state enabled
        LOGGER_CLI.info('Configuring Terraform Remote State')
        if not terraform_generate(config=config):
            return

        if not run_command(['terraform', 'init']):
            return

        LOGGER_CLI.info('Deploying Lambda Functions')
        # deploy both lambda functions
        deploy(deploy_opts(['rule', 'alert'], []), config)
        # create all remainder infrastructure

        LOGGER_CLI.info('Building Remainder Infrastructure')
        tf_runner()

    elif options.subcommand == 'clean':
        if not continue_prompt(
                message='Are you sure you want to clean all Terraform files?'):
            sys.exit(1)
        terraform_clean(config)

    elif options.subcommand == 'destroy':
        if not continue_prompt(message='Are you sure you want to destroy?'):
            sys.exit(1)

        if options.target:
            targets = []
            # Iterate over any targets to destroy. Global modules, like athena
            # are prefixed with `stream_alert_` while cluster based modules
            # are a combination of the target and cluster name
            for target in options.target:
                if target == 'athena':
                    targets.append('module.stream_alert_{}'.format(target))
                elif target == 'threat_intel_downloader':
                    targets.append('module.threat_intel_downloader')
                else:
                    targets.extend([
                        'module.{}_{}'.format(target, cluster)
                        for cluster in config.clusters()
                    ])

            tf_runner(targets=targets, action='destroy')
            return

        # Migrate back to local state so Terraform can successfully
        # destroy the S3 bucket used by the backend.
        if not terraform_generate(config=config, init=True):
            return

        if not run_command(['terraform', 'init']):
            return

        # Destroy all of the infrastructure
        if not tf_runner(action='destroy'):
            return

        # Remove old Terraform files
        terraform_clean(config)

    # get a quick status on our declared infrastructure
    elif options.subcommand == 'status':
        terraform_status(config)
Beispiel #29
0
def rebuild_partitions(athena_client, options, config):
    """Rebuild an Athena table's partitions

    Steps:
      - Get the list of current partitions
      - Destroy existing table
      - Re-create tables
      - Re-create partitions

    Args:
        athena_client (boto3.client): Instantiated CLI AthenaClient
        options (namedtuple): The parsed args passed from the CLI
        config (CLIConfig): Loaded StreamAlert CLI
    """
    if not options.table_name:
        LOGGER_CLI.error('Missing command line argument --table_name')
        return

    if not options.bucket:
        LOGGER_CLI.error('Missing command line argument --bucket')
        return

    sa_firehose = StreamAlertFirehose(
        config['global']['account']['region'],
        config['global']['infrastructure']['firehose'], config['logs'])
    sanitized_table_name = sa_firehose.firehose_log_name(options.table_name)

    if options.type == 'data':
        # Get the current set of partitions
        partition_success, partitions = athena_client.run_athena_query(
            query='SHOW PARTITIONS {}'.format(sanitized_table_name),
            database='streamalert')
        if not partition_success:
            LOGGER_CLI.error('An error occured when loading partitions for %s',
                             sanitized_table_name)
            return

        unique_partitions = athena_helpers.unique_values_from_query(partitions)

        # Drop the table
        LOGGER_CLI.info('Dropping table %s', sanitized_table_name)
        drop_success, _ = athena_client.run_athena_query(
            query='DROP TABLE {}'.format(sanitized_table_name),
            database='streamalert')
        if not drop_success:
            LOGGER_CLI.error('An error occured when dropping the %s table',
                             sanitized_table_name)
            return

        LOGGER_CLI.info('Dropped table %s', sanitized_table_name)

        new_partitions_statement = athena_helpers.partition_statement(
            unique_partitions, options.bucket, sanitized_table_name)

        # Make sure our new alter table statement is within the query API limits
        if len(new_partitions_statement) > MAX_QUERY_LENGTH:
            LOGGER_CLI.error(
                'Partition statement too large, writing to local file')
            with open('partitions_{}.txt'.format(sanitized_table_name),
                      'w') as partition_file:
                partition_file.write(new_partitions_statement)
            return

        # Re-create the table with previous partitions
        options.refresh_type = 'add_hive_partition'
        create_table(athena_client, options, config)

        LOGGER_CLI.info('Creating %d new partitions for %s',
                        len(unique_partitions), sanitized_table_name)
        new_part_success, _ = athena_client.run_athena_query(
            query=new_partitions_statement, database='streamalert')
        if not new_part_success:
            LOGGER_CLI.error('Error re-creating new partitions for %s',
                             sanitized_table_name)
            return

        LOGGER_CLI.info('Successfully rebuilt partitions for %s',
                        sanitized_table_name)

    else:
        LOGGER_CLI.info('Refreshing alerts tables unsupported')
Beispiel #30
0
def create_table(table, bucket, config, schema_override=None):
    """Create a 'streamalert' Athena table

    Args:
        table (str): The name of the table being rebuilt
        bucket (str): The s3 bucket to be used as the location for Athena data
        table_type (str): The type of table being refreshed
        config (CLIConfig): Loaded StreamAlert CLI
        schema_override (set): An optional set of key=value pairs to be used for
            overriding the configured column_name=value_type.
    """
    enabled_logs = FirehoseClient.load_enabled_log_sources(
        config['global']['infrastructure']['firehose'],
        config['logs']
    )

    # Convert special characters in schema name to underscores
    sanitized_table_name = FirehoseClient.firehose_log_name(table)

    # Check that the log type is enabled via Firehose
    if sanitized_table_name != 'alerts' and sanitized_table_name not in enabled_logs:
        LOGGER_CLI.error('Table name %s missing from configuration or '
                         'is not enabled.', sanitized_table_name)
        return

    athena_client = get_athena_client(config)

    # Check if the table exists
    if athena_client.check_table_exists(sanitized_table_name):
        LOGGER_CLI.info('The \'%s\' table already exists.', sanitized_table_name)
        return

    if table == 'alerts':
        # get a fake alert so we can get the keys needed and their types
        alert = Alert('temp_rule_name', {}, {})
        output = alert.output_dict()
        schema = record_to_schema(output)
        athena_schema = helpers.logs_schema_to_athena_schema(schema)

        query = _construct_create_table_statement(
            schema=athena_schema, table_name=table, bucket=bucket)

    else: # all other tables are log types

        log_info = config['logs'][table.replace('_', ':', 1)]

        schema = dict(log_info['schema'])
        sanitized_schema = FirehoseClient.sanitize_keys(schema)

        athena_schema = helpers.logs_schema_to_athena_schema(sanitized_schema)

        # Add envelope keys to Athena Schema
        configuration_options = log_info.get('configuration')
        if configuration_options:
            envelope_keys = configuration_options.get('envelope_keys')
            if envelope_keys:
                sanitized_envelope_key_schema = FirehoseClient.sanitize_keys(envelope_keys)
                # Note: this key is wrapped in backticks to be Hive compliant
                athena_schema['`streamalert:envelope_keys`'] = helpers.logs_schema_to_athena_schema(
                    sanitized_envelope_key_schema)

        # Handle Schema overrides
        #   This is useful when an Athena schema needs to differ from the normal log schema
        if schema_override:
            for override in schema_override:
                column_name, column_type = override.split('=')
                if not all([column_name, column_type]):
                    LOGGER_CLI.error('Invalid schema override [%s], use column_name=type format',
                                     override)

                # Columns are escaped to avoid Hive issues with special characters
                column_name = '`{}`'.format(column_name)
                if column_name in athena_schema:
                    athena_schema[column_name] = column_type
                    LOGGER_CLI.info('Applied schema override: %s:%s', column_name, column_type)
                else:
                    LOGGER_CLI.error(
                        'Schema override column %s not found in Athena Schema, skipping',
                        column_name)

        query = _construct_create_table_statement(
            schema=athena_schema, table_name=sanitized_table_name, bucket=bucket)

    success = athena_client.run_query(query=query)
    if not success:
        LOGGER_CLI.error('The %s table could not be created', sanitized_table_name)
        return

    # Update the CLI config
    if (table != 'alerts' and
            bucket not in config['lambda']['athena_partition_refresh_config']['buckets']):
        config['lambda']['athena_partition_refresh_config']['buckets'][bucket] = 'data'
        config.write()

    LOGGER_CLI.info('The %s table was successfully created!', sanitized_table_name)