def set_prefix(self, prefix): """Set the Org Prefix in Global settings""" if not isinstance(prefix, (unicode, str)): LOGGER_CLI.error('Invalid prefix type, must be string') return if '_' in prefix: LOGGER_CLI.error('Prefix cannot contain underscores') return self.config['global']['account']['prefix'] = prefix self.config['global']['terraform']['tfstate_bucket'] = self.config[ 'global']['terraform']['tfstate_bucket'].replace( 'PREFIX_GOES_HERE', prefix) self.config['lambda']['alert_processor_config'][ 'source_bucket'] = self.config['lambda']['alert_processor_config'][ 'source_bucket'].replace('PREFIX_GOES_HERE', prefix) self.config['lambda']['rule_processor_config'][ 'source_bucket'] = self.config['lambda']['rule_processor_config'][ 'source_bucket'].replace('PREFIX_GOES_HERE', prefix) if self.config['lambda'].get('stream_alert_apps_config'): self.config['lambda']['stream_alert_apps_config'][ 'source_bucket'] = self.config['lambda'][ 'stream_alert_apps_config']['source_bucket'].replace( 'PREFIX_GOES_HERE', prefix) if self.config['lambda'].get('threat_intel_downloader_config'): self.config['lambda']['threat_intel_downloader_config']['source_bucket'] = \ self.config['lambda'][ 'threat_intel_downloader_config']['source_bucket'].replace( 'PREFIX_GOES_HERE', prefix ) self.write() LOGGER_CLI.info('Prefix successfully configured')
def terraform_generate(**kwargs): """Generate all Terraform plans for the configured clusters. Keyword Args: config [dict]: The loaded config from the 'conf/' directory init [bool]: Indicates if main.tf is generated for `terraform init` """ config = kwargs.get('config') init = kwargs.get('init', False) # Setup main LOGGER_CLI.info('Generating cluster file: main.tf') main_json = json.dumps(generate_main(init=init, config=config), indent=2, sort_keys=True) with open('terraform/main.tf', 'w') as tf_file: tf_file.write(main_json) # Break out early during the init process, clusters aren't needed yet if init: return True # Setup clusters for cluster in config.clusters(): if cluster == 'main': raise InvalidClusterName( 'Rename cluster "main" to something else!') LOGGER_CLI.info('Generating cluster file: %s.tf', cluster) cluster_json = json.dumps(generate_cluster(cluster_name=cluster, config=config), indent=2, sort_keys=True) with open('terraform/{}.tf'.format(cluster), 'w') as tf_file: tf_file.write(cluster_json) return True
def tf_runner(action='apply', refresh=True, auto_approve=False, targets=None): """Terraform wrapper to build StreamAlert infrastructure. Resolves modules with `terraform get` before continuing. Args: action (str): Terraform action ('apply' or 'destroy'). refresh (bool): If True, Terraform will refresh its state before applying the change. auto_approve (bool): If True, Terraform will *not* prompt the user for approval. targets (list): Optional list of affected targets. If not specified, Terraform will run against all of its resources. Returns: bool: True if the terraform command was successful """ LOGGER_CLI.debug('Resolving Terraform modules') if not run_command(['terraform', 'get'], quiet=True): return False tf_command = [ 'terraform', action, '-var-file=../conf/lambda.json', '-refresh={}'.format(str(refresh).lower()) ] if action == 'destroy': # Terraform destroy has a '-force' flag instead of '-auto-approve' LOGGER_CLI.info('Destroying infrastructure') tf_command.append('-force={}'.format(str(auto_approve).lower())) else: LOGGER_CLI.info('%s changes', 'Applying' if auto_approve else 'Planning') tf_command.append('-auto-approve={}'.format(str(auto_approve).lower())) if targets: tf_command.extend('-target={}'.format(x) for x in targets) return run_command(tf_command)
def set_prefix(self, prefix): """Set the Org Prefix in Global settings""" if not isinstance(prefix, (unicode, str)): LOGGER_CLI.error('Invalid prefix type, must be string') return if '_' in prefix: LOGGER_CLI.error('Prefix cannot contain underscores') return tf_state_bucket = '{}.streamalert.terraform.state'.format(prefix) self.config['global']['account']['prefix'] = prefix self.config['global']['account']['kms_key_alias'] = '{}_streamalert_secrets'.format(prefix) self.config['global']['terraform']['tfstate_bucket'] = tf_state_bucket self.config['lambda']['athena_partition_refresh_config']['buckets'].clear() self.config['lambda']['athena_partition_refresh_config']['buckets'] \ ['{}.streamalerts'.format(prefix)] = 'alerts' lambda_funcs = [ 'alert_merger', 'alert_processor', 'athena_partition_refresh', 'rule_processor', 'stream_alert_apps', 'threat_intel_downloader' ] # Update all function configurations with the source streamalert source bucket info source_bucket = '{}.streamalert.source'.format(prefix) for func in lambda_funcs: func_config = '{}_config'.format(func) if func_config in self.config['lambda']: self.config['lambda'][func_config]['source_bucket'] = source_bucket self.write() LOGGER_CLI.info('Prefix successfully configured')
def tf_runner(**kwargs): """Terraform wrapper to build StreamAlert infrastructure. Steps: - resolve modules with `terraform get` - run `terraform plan` for the given targets - if plan is successful and user confirms prompt, then the infrastructure is applied kwargs: targets: a list of Terraform targets action: 'apply' or 'destroy' Returns: bool: True if the terraform command was successful """ targets = kwargs.get('targets', []) action = kwargs.get('action', None) tf_action_index = 1 # The index to the terraform 'action' var_files = {'conf/lambda.json'} tf_opts = ['-var-file=../{}'.format(x) for x in var_files] tf_targets = ['-target={}'.format(x) for x in targets] tf_command = ['terraform', 'plan'] + tf_opts + tf_targets if action == 'destroy': tf_command.append('-destroy') LOGGER_CLI.debug('Resolving Terraform modules') if not run_command(['terraform', 'get'], quiet=True): return False LOGGER_CLI.info('Planning infrastructure') if not run_command(tf_command): return False if not continue_prompt(): sys.exit(1) if action == 'destroy': LOGGER_CLI.info('Destroying infrastructure') tf_command[tf_action_index] = action tf_command.remove('-destroy') tf_command.append('-force') elif action: tf_command[tf_action_index] = action else: LOGGER_CLI.info('Creating infrastructure') tf_command[tf_action_index] = 'apply' tf_command.append('-refresh=false') if not run_command(tf_command): return False return True
def _resolve_third_party(self, temp_package_path): """Install all third-party packages into the deployment package folder Args: temp_package_path (str): Full path to temp package path Returns: bool: False if the pip command failed to install requirements, True otherwise """ third_party_libs = self.config['lambda'][self.config_key]['third_party_libraries'] # Return a default of True here if no libraries to install if not third_party_libs: LOGGER_CLI.info('No third-party libraries to install.') return True LOGGER_CLI.info( 'Installing third-party libraries: %s', ', '.join(third_party_libs)) pip_command = ['pip', 'install'] pip_command.extend(third_party_libs) pip_command.extend(['--upgrade', '--target', temp_package_path]) # Return True if the pip command is successfully run return run_command(pip_command, cwd=temp_package_path, quiet=True)
def _resolve_third_party(self, temp_package_path): """Install all third-party packages into the deployment package folder Args: temp_package_path [string]: Full path to temp package path Returns: [boolean] False if the pip command failed to install requirements, True otherwise """ third_party_libs = self.config['lambda'][ self.config_key]['third_party_libraries'] if third_party_libs: LOGGER_CLI.info('Installing third-party libraries: %s', ', '.join(third_party_libs)) pip_command = ['install'] pip_command.extend(third_party_libs) pip_command.extend(['--upgrade', '--target', temp_package_path]) # Return True if the pip result code is 0 return pip.main(pip_command) == 0 else: LOGGER_CLI.info('No third-party libraries to install.') # Return a default of True here if pip is not called return True
def generate_flow_logs(cluster_name, cluster_dict, config): """Add the VPC Flow Logs module to the Terraform cluster dict. Args: cluster_name [string]: The name of the currently generating cluster cluster_dict [defaultdict]: The dict containing all Terraform config for a given cluster. config [dict]: The loaded config from the 'conf/' directory Returns: [bool] Result of applying the flow_logs module """ modules = config['clusters'][cluster_name]['modules'] flow_log_group_name_default = '{}_{}_streamalert_flow_logs'.format( config['global']['account']['prefix'], cluster_name) flow_log_group_name = modules['flow_logs'].get( 'log_group_name', flow_log_group_name_default) if modules['flow_logs']['enabled']: cluster_dict['module']['flow_logs_{}'.format(cluster_name)] = { 'source': 'modules/tf_stream_alert_flow_logs', 'destination_stream_arn': '${{module.kinesis_{}.arn}}'.format(cluster_name), 'flow_log_group_name': flow_log_group_name } for flow_log_input in ('vpcs', 'subnets', 'enis'): input_data = modules['flow_logs'].get(flow_log_input) if input_data: cluster_dict['module']['flow_logs_{}'.format( cluster_name)][flow_log_input] = input_data return True else: LOGGER_CLI.info('Flow logs disabled, nothing to do') return False
def _terraform_clean(config): """Remove leftover Terraform statefiles and main/cluster files Args: config (CLIConfig): Loaded StreamAlert CLI """ LOGGER_CLI.info('Cleaning Terraform files') cleanup_files = [ '{}.tf.json'.format(cluster) for cluster in config.clusters() ] cleanup_files.extend([ 'athena.tf.json', 'main.tf.json', 'terraform.tfstate', 'terraform.tfstate.backup' ]) for tf_file in cleanup_files: file_to_remove = 'terraform/{}'.format(tf_file) if not os.path.isfile(file_to_remove): continue os.remove(file_to_remove) # Finally, delete the Terraform directory if os.path.isdir('terraform/.terraform/'): shutil.rmtree('terraform/.terraform/')
def create_database(athena_client): """Create the 'streamalert' Athena database Args: athena_client (boto3.client): Instantiated CLI AthenaClient """ if athena_client.check_database_exists(): LOGGER_CLI.info( 'The \'streamalert\' database already exists, nothing to do') return create_db_success, create_db_result = athena_client.run_athena_query( query='CREATE DATABASE streamalert') if create_db_success and create_db_result['ResultSet'].get('Rows'): LOGGER_CLI.info('streamalert database successfully created!') LOGGER_CLI.info('results: %s', create_db_result['ResultSet']['Rows'])
def add_app_integration(self, app_info): """Add a configuration for a new streamalert app integration function Args: app_info (dict): The necessary values needed to begin configuring a new app integration """ exists, prompt_for_auth, overwrite = False, True, False app = StreamAlertApp.get_app(app_info, False) # Check to see if there is an existing configuration for this app integration cluster_config = self.config['clusters'][app_info['cluster']] if app_info['app_name'] in cluster_config['modules'].get( 'stream_alert_apps', {}): prompt = ( 'An app with the name \'{}\' is already configured for cluster ' '\'{}\'. Would you like to update the existing app\'s configuration' '?'.format(app_info['app_name'], app_info['cluster'])) exists = True # Return if the user is not deliberately updating an existing config if not continue_prompt(message=prompt): return prompt = ( 'Would you also like to update the authentication information for ' 'app integration with name \'{}\'?'.format( app_info['app_name'])) # If this is true, we shouldn't prompt again to warn about overwriting prompt_for_auth = overwrite = continue_prompt(message=prompt) if prompt_for_auth and not save_app_auth_info(app, app_info, overwrite): return apps_config = cluster_config['modules'].get('stream_alert_apps', {}) local_config_keys = {'interval', 'timeout', 'memory'} if not exists: # Save a default log level as info to the config app_info['log_level'] = 'info' app_info['current_version'] = '$LATEST' local_config_keys.update({'log_level', 'current_version', 'type'}) apps_config[app_info['app_name']] = { key: app_info[key] for key in local_config_keys } else: apps_config[app_info['app_name']].update( {key: app_info[key] for key in local_config_keys}) cluster_config['modules']['stream_alert_apps'] = apps_config # Add this service to the sources for this app integration # The `stream_alert_app` is purposely singular here app_sources = self.config['sources'].get('stream_alert_app', {}) app_sources[app_info['function_name']] = {'logs': [app.service()]} self.config['sources']['stream_alert_app'] = app_sources LOGGER_CLI.info( 'Successfully added \'%s\' app integration to \'conf/clusters/%s.json\' ' 'for service \'%s\'.', app_info['app_name'], app_info['cluster'], app_info['type']) self.write()
def athena_handler(options): """Handle Athena operations""" athena_client = StreamAlertAthenaClient( CONFIG, results_key_prefix='stream_alert_cli') if options.subcommand == 'init': CONFIG.generate_athena() elif options.subcommand == 'enable': CONFIG.set_athena_lambda_enable() elif options.subcommand == 'create-db': if athena_client.check_database_exists(): LOGGER_CLI.info( 'The \'streamalert\' database already exists, nothing to do') return create_db_success, create_db_result = athena_client.run_athena_query( query='CREATE DATABASE streamalert') if create_db_success and create_db_result['ResultSet'].get('Rows'): LOGGER_CLI.info('streamalert database successfully created!') LOGGER_CLI.info('results: %s', create_db_result['ResultSet']['Rows']) elif options.subcommand == 'create-table': if not options.bucket: LOGGER_CLI.error('Missing command line argument --bucket') return if not options.refresh_type: LOGGER_CLI.error('Missing command line argument --refresh_type') return if options.type == 'data': if not options.table_name: LOGGER_CLI.error('Missing command line argument --table_name') return if options.table_name not in enabled_firehose_logs(CONFIG): LOGGER_CLI.error( 'Table name %s missing from configuration or ' 'is not enabled.', options.table_name) return if athena_client.check_table_exists(options.table_name): LOGGER_CLI.info('The \'%s\' table already exists.', options.table_name) return log_info = CONFIG['logs'][options.table_name.replace('_', ':', 1)] schema = dict(log_info['schema']) schema_statement = '' sanitized_schema = StreamAlert.sanitize_keys(schema) athena_schema = {} schema_type_mapping = { 'string': 'string', 'integer': 'int', 'boolean': 'boolean', 'float': 'decimal', dict: 'map<string, string>', list: 'array<string>' } def add_to_athena_schema(schema, root_key=''): """Helper function to add sanitized schemas to the Athena table schema""" # Setup the root_key dict if root_key and not athena_schema.get(root_key): athena_schema[root_key] = {} for key_name, key_type in schema.iteritems(): # When using special characters in the beginning or end # of a column name, they have to be wrapped in backticks key_name = '`{}`'.format(key_name) special_key = None # Transform the {} or [] into hashable types if key_type == {}: special_key = dict elif key_type == []: special_key = list # Cast nested dict as a string for now # TODO(jacknagz): support recursive schemas elif isinstance(key_type, dict): special_key = 'string' # Account for envelope keys if root_key: if special_key is not None: athena_schema[root_key][ key_name] = schema_type_mapping[special_key] else: athena_schema[root_key][ key_name] = schema_type_mapping[key_type] else: if special_key is not None: athena_schema[key_name] = schema_type_mapping[ special_key] else: athena_schema[key_name] = schema_type_mapping[ key_type] add_to_athena_schema(sanitized_schema) # Support envelope keys configuration_options = log_info.get('configuration') if configuration_options: envelope_keys = configuration_options.get('envelope_keys') if envelope_keys: sanitized_envelope_keys = StreamAlert.sanitize_keys( envelope_keys) # Note: this key is wrapped in backticks to be Hive compliant add_to_athena_schema(sanitized_envelope_keys, '`streamalert:envelope_keys`') for key_name, key_type in athena_schema.iteritems(): # Account for nested structs if isinstance(key_type, dict): struct_schema = ''.join([ '{0}:{1},'.format(sub_key, sub_type) for sub_key, sub_type in key_type.iteritems() ]) nested_schema_statement = '{0} struct<{1}>, '.format( key_name, # Use the minus index to remove the last comma struct_schema[:-1]) schema_statement += nested_schema_statement else: schema_statement += '{0} {1},'.format(key_name, key_type) query = ( 'CREATE EXTERNAL TABLE {table_name} ({schema}) ' 'PARTITIONED BY (dt string) ' 'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\' ' 'LOCATION \'s3://{bucket}/{table_name}/\''.format( table_name=options.table_name, # Use the minus index to remove the last comma schema=schema_statement[:-1], bucket=options.bucket)) elif options.type == 'alerts': if athena_client.check_table_exists(options.type): LOGGER_CLI.info('The \'alerts\' table already exists.') return query = ('CREATE EXTERNAL TABLE alerts (' 'log_source string,' 'log_type string,' 'outputs array<string>,' 'record string,' 'rule_description string,' 'rule_name string,' 'source_entity string,' 'source_service string)' 'PARTITIONED BY (dt string)' 'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\'' 'LOCATION \'s3://{bucket}/alerts/\''.format( bucket=options.bucket)) if query: create_table_success, _ = athena_client.run_athena_query( query=query, database='streamalert') if create_table_success: CONFIG['lambda']['athena_partition_refresh_config'] \ ['refresh_type'][options.refresh_type][options.bucket] = options.type CONFIG.write() table_name = options.type if options.type == 'alerts' else options.table_name LOGGER_CLI.info('The %s table was successfully created!', table_name)
def athena_handler(options): """Handle Athena operations""" athena_client = StreamAlertAthenaClient( CONFIG, results_key_prefix='stream_alert_cli') if options.subcommand == 'init': CONFIG.generate_athena() elif options.subcommand == 'enable': CONFIG.set_athena_lambda_enable() elif options.subcommand == 'create-db': if athena_client.check_database_exists(): LOGGER_CLI.info( 'The \'streamalert\' database already exists, nothing to do') return create_db_success, create_db_result = athena_client.run_athena_query( query='CREATE DATABASE streamalert') if create_db_success and create_db_result['ResultSet'].get('Rows'): LOGGER_CLI.info('streamalert database successfully created!') LOGGER_CLI.info('results: %s', create_db_result['ResultSet']['Rows']) elif options.subcommand == 'create-table': if not options.bucket: LOGGER_CLI.error('Missing command line argument --bucket') return if not options.refresh_type: LOGGER_CLI.error('Missing command line argument --refresh_type') return if options.type == 'data': if not options.table_name: LOGGER_CLI.error('Missing command line argument --table_name') return if options.table_name not in enabled_firehose_logs(CONFIG): LOGGER_CLI.error( 'Table name %s missing from configuration or ' 'is not enabled.', options.table_name) return if athena_client.check_table_exists(options.table_name): LOGGER_CLI.info('The \'%s\' table already exists.', options.table_name) return schema = CONFIG['logs'][options.table_name.replace('_', ':')]['schema'] sanitized_schema = StreamAlert.sanitize_keys(schema) athena_schema = {} schema_type_mapping = { 'string': 'string', 'integer': 'int', 'boolean': 'boolean', 'float': 'decimal', dict: 'map<string, string>', list: 'array<string>' } for key_name, key_type in sanitized_schema.iteritems(): # Transform the {} or [] into hashable types if key_type == {}: key_type = dict elif key_type == []: key_type = list athena_schema[key_name] = schema_type_mapping[key_type] schema_statement = ''.join([ '{0} {1},'.format(key_name, key_type) for key_name, key_type in athena_schema.iteritems() ])[:-1] query = ('CREATE EXTERNAL TABLE {table_name} ({schema})' 'PARTITIONED BY (dt string)' 'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\'' 'LOCATION \'s3://{bucket}/{table_name}/\''.format( table_name=options.table_name, schema=schema_statement, bucket=options.bucket)) elif options.type == 'alerts': if athena_client.check_table_exists(options.type): LOGGER_CLI.info('The \'alerts\' table already exists.') return query = ('CREATE EXTERNAL TABLE alerts (' 'log_source string,' 'log_type string,' 'outputs array<string>,' 'record string,' 'rule_description string,' 'rule_name string,' 'source_entity string,' 'source_service string)' 'PARTITIONED BY (dt string)' 'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\'' 'LOCATION \'s3://{bucket}/alerts/\''.format( bucket=options.bucket)) if query: create_table_success, _ = athena_client.run_athena_query( query=query, database='streamalert') if create_table_success: CONFIG['lambda']['athena_partition_refresh_config'] \ ['refresh_type'][options.refresh_type][options.bucket] = options.type CONFIG.write() LOGGER_CLI.info('The %s table was successfully created!', options.type)
def _publish_helper(self, **kwargs): """Handle clustered or single Lambda function publishing Keyword Arguments: cluster (str): The cluster to deploy to, this is optional Returns: bool: Result of the function publishes """ cluster = kwargs.get('cluster') # Clustered Lambda functions have a different naming pattern if cluster: region = self.config['clusters'][cluster]['region'] function_name = '{}_{}_streamalert_{}'.format( self.config['global']['account']['prefix'], cluster, self.package.package_name) else: region = self.config['global']['account']['region'] function_name = '{}_streamalert_{}'.format( self.config['global']['account']['prefix'], self.package.package_name) # Configure the Lambda client client = boto3.client('lambda', region_name=region) code_sha_256 = self.config['lambda'][ self.package.config_key]['source_current_hash'] # Publish the function(s) # TODO: move the extra logic into the LambdaPackage subclasses instead of this if self.package.package_name == 'stream_alert_app': if not 'stream_alert_apps' in self.config['clusters'][cluster][ 'modules']: return True # nothing to publish for this cluster for app_name, app_info in self.config['clusters'][cluster]['modules'] \ ['stream_alert_apps'].iteritems(): # Name follows format: '<prefix>_<cluster>_<service>_<app_name>_app' function_name = '_'.join([ self.config['global']['account']['prefix'], cluster, app_info['type'], app_name, 'app' ]) new_version = self._publish(client, function_name, code_sha_256) if not new_version: continue LOGGER_CLI.info('Published version %s for %s:%s', new_version, cluster, function_name) app_info['current_version'] = new_version else: new_version = self._publish(client, function_name, code_sha_256) if not new_version: return False # Update the config if cluster: LOGGER_CLI.info('Published version %s for %s:%s', new_version, cluster, function_name) self.config['clusters'][cluster]['modules']['stream_alert'] \ [self.package.package_name]['current_version'] = new_version else: LOGGER_CLI.info('Published version %s for %s', new_version, function_name) self.config['lambda'][ self.package.config_key]['current_version'] = new_version self.config.write() return True
def add_metric_alarm(self, alarm_info): """Add a metric alarm that corresponds to a predefined metrics Args: alarm_info (dict): All the necessary values needed to add a CloudWatch metric alarm """ # Check to see if an alarm with this name already exists if self._alarm_exists(alarm_info['alarm_name']): return # Get the current metrics for each function current_metrics = metrics.MetricLogger.get_available_metrics() # Extract the function name this metric is associated with metric_function = {metric: function for function in current_metrics for metric in current_metrics[function]}[alarm_info['metric_name']] # Do not continue if the user is trying to apply a metric alarm for an athena # metric to a specific cluster (since the athena function operates on all clusters) if (alarm_info['metric_target'] != 'aggregate' and metric_function == metrics.ATHENA_PARTITION_REFRESH_NAME): LOGGER_CLI.error('Metrics for the athena function can only be applied ' 'to an aggregate metric target, not on a per-cluster basis.') return # If the metric is related to either the rule processor or alert processor, we should # check to see if any cluster has metrics enabled for that function before continuing if (metric_function in {metrics.ALERT_PROCESSOR_NAME, metrics.RULE_PROCESSOR_NAME} and not any(self.config['clusters'][cluster]['modules']['stream_alert'] [metric_function].get('enable_metrics') for cluster in self.config['clusters'])): prompt = ('Metrics are not currently enabled for the \'{}\' function ' 'within any cluster. Creating an alarm will have no effect ' 'until metrics are enabled for this function in at least one ' 'cluster. Would you still like to continue?'.format(metric_function)) if not continue_prompt(message=prompt): return elif metric_function == metrics.ATHENA_PARTITION_REFRESH_NAME: # If the user is attempting to add a metric for athena, make sure the athena # function is initialized first if 'athena_partition_refresh_config' not in self.config['lambda']: LOGGER_CLI.error('No configuration found for Athena Partition Refresh. ' 'Please run: `$ python manage.py athena init` first.') return # If the athena function is initialized, but metrics are not enabled, ask # the user if they would like to enable them now if not self.config['lambda']['athena_partition_refresh_config'].get('enable_metrics'): prompt = ('Metrics are not currently enabled for the \'athena\' function. ' 'Would you like to enable metrics for athena?') if continue_prompt(message=prompt): self.toggle_metrics(True, None, [metric_function]) elif not continue_prompt(message='Would you still like to add this alarm ' 'even though metrics are disabled?'): return # Add metric alarms for the aggregate metrics - these are added to the global config if (alarm_info['metric_target'] == 'aggregate' or metric_function == metrics.ATHENA_PARTITION_REFRESH_NAME): global_config = self.config['global']['infrastructure']['monitoring'] metric_alarms = global_config.get('metric_alarms', {}) if not metric_alarms: global_config['metric_alarms'] = {} metric_alarms = global_config['metric_alarms'].get(metric_function, {}) if not metric_alarms: global_config['metric_alarms'][metric_function] = {} # Format the metric name for the aggregate metric alarm_settings = alarm_info.copy() alarm_settings['metric_name'] = '{}-{}'.format(metrics.FUNC_PREFIXES[metric_function], alarm_info['metric_name']) new_alarms = self._add_metric_alarm_config(alarm_settings, metric_alarms) if new_alarms != False: global_config['metric_alarms'][metric_function] = new_alarms LOGGER_CLI.info('Successfully added \'%s\' metric alarm to ' '\'conf/global.json\'.', alarm_settings['alarm_name']) else: # Add metric alarms on a per-cluster basis - these are added to the cluster config self._add_metric_alarm_per_cluster(alarm_info, metric_function) # Save all of the alarm updates to disk self.write()
def add_app_integration(self, app_info): """Add a configuration for a new streamalert app integration function Args: app_info (dict): The necessary values needed to begin configuring a new app integration """ exists, prompt_for_auth, overwrite = False, True, False app = StreamAlertApp.get_app(app_info, False) cluster_name = app_info['cluster'] app_name = app_info['app_name'] func_name = app_info['function_name'] # Check to see if there is an existing configuration for this app integration cluster_config = self.config['clusters'][cluster_name] if func_name in cluster_config['modules'].get('stream_alert_apps', {}): prompt = ( 'An app with the name \'{}\' is already configured for cluster ' '\'{}\'. Would you like to update the existing app\'s configuration' '?'.format(app_name, cluster_name)) exists = True # Return if the user is not deliberately updating an existing config if not continue_prompt(message=prompt): return prompt = ( 'Would you also like to update the authentication information for ' 'app integration with name \'{}\'?'.format(app_name)) # If this is true, we shouldn't prompt again to warn about overwriting prompt_for_auth = overwrite = continue_prompt(message=prompt) if prompt_for_auth and not save_app_auth_info(app, app_info, overwrite): return apps_config = cluster_config['modules'].get('stream_alert_apps', {}) if not exists: # Save a default app settings to the config for new apps new_app_config = { 'app_name': app_info['app_name'], 'concurrency_limit': 2, 'log_level': 'info', 'log_retention_days': 14, 'memory': app_info['memory'], 'metric_alarms': { 'errors': { 'enabled': True, 'evaluation_periods': 1, 'period_secs': 120 } }, 'schedule_expression': app_info['schedule_expression'], 'timeout': app_info['timeout'], 'type': app_info['type'] } apps_config[func_name] = new_app_config else: # Allow for updating certain attributes for the app without overwriting # current parts of the configuration updated_app_config = { 'memory': app_info['memory'], 'schedule_expression': app_info['schedule_expression'], 'timeout': app_info['timeout'] } apps_config[func_name].update(updated_app_config) cluster_config['modules']['stream_alert_apps'] = apps_config # Add this service to the sources for this app integration # The `stream_alert_app` is purposely singular here app_sources = self.config['sources'].get('stream_alert_app', {}) app_sources[app_info['function_name']] = {'logs': [app.service()]} self.config['sources']['stream_alert_app'] = app_sources LOGGER_CLI.info( 'Successfully added \'%s\' app integration to \'conf/clusters/%s.json\' ' 'for service \'%s\'.', app_info['app_name'], app_info['cluster'], app_info['type']) self.write()
def format_lambda_test_record(test_record): """Create a properly formatted Kinesis, S3, or SNS record. Supports a dictionary or string based data record. Reads in event templates from the tests/integration/templates folder. Args: test_record (dict): Test record metadata dict with the following structure: data - string or dict of the raw data description - a string describing the test that is being performed trigger - bool of if the record should produce an alert source - which stream/s3 bucket originated the data service - which aws service originated the data compress (optional) - if the payload needs to be gzip compressed or not Returns: dict: in the format of the specific service """ service = test_record['service'] source = test_record['source'] compress = test_record.get('compress') data_type = type(test_record['data']) if data_type == dict: data = json.dumps(test_record['data']) elif data_type in (unicode, str): data = test_record['data'] else: LOGGER_CLI.info('Invalid data type: %s', data_type) return # Get the template file for this particular service record_template = _get_record_template(service) if not record_template: return if service == 's3': # Set the S3 object key to a random value for testing test_record['key'] = ('{:032X}'.format(random.randrange(16**32))) record_template['s3']['object']['key'] = test_record['key'] record_template['s3']['object']['size'] = len(data) record_template['s3']['bucket']['arn'] = 'arn:aws:s3:::{}'.format( source) record_template['s3']['bucket']['name'] = source # Create the mocked s3 object in the designated bucket with the random key put_mock_s3_object(source, test_record['key'], data, 'us-east-1') elif service == 'kinesis': if compress: kinesis_data = base64.b64encode(zlib.compress(data)) else: kinesis_data = base64.b64encode(data) record_template['kinesis']['data'] = kinesis_data record_template['eventSourceARN'] = ( 'arn:aws:kinesis:us-east-1:111222333:' 'stream/{}'.format(source)) elif service == 'sns': record_template['Sns']['Message'] = data record_template[ 'EventSubscriptionArn'] = 'arn:aws:sns:us-east-1:111222333:{}'.format( source) elif service == 'stream_alert_app': record_template['stream_alert_app'] = source record_template['logs'] = [data] else: LOGGER_CLI.info('Invalid service %s', service) return record_template
def main(): """Entry point for the CLI.""" parser = build_parser() options = parser.parse_args() cli_runner(options) LOGGER_CLI.info('Completed')
def generate_cloudtrail(cluster_name, cluster_dict, config): """Add the CloudTrail module to the Terraform cluster dict. Args: cluster_name (str): The name of the currently generating cluster cluster_dict (defaultdict): The dict containing all Terraform config for a given cluster. config (dict): The loaded config from the 'conf/' directory Returns: bool: Result of applying the cloudtrail module """ modules = config['clusters'][cluster_name]['modules'] cloudtrail_module = 'cloudtrail_{}'.format(cluster_name) enabled_legacy = modules['cloudtrail'].get('enabled') cloudtrail_enabled = modules['cloudtrail'].get('enable_logging', True) kinesis_enabled = modules['cloudtrail'].get('enable_kinesis', True) send_to_cloudwatch = modules['cloudtrail'].get('send_to_cloudwatch', False) exclude_home_region = modules['cloudtrail'].get('exclude_home_region_events', False) account_ids = list( set([config['global']['account']['aws_account_id']] + modules['cloudtrail'].get( 'cross_account_ids', []))) # Allow for backwards compatilibity if enabled_legacy: del config['clusters'][cluster_name]['modules']['cloudtrail']['enabled'] config['clusters'][cluster_name]['modules']['cloudtrail']['enable_logging'] = True config['clusters'][cluster_name]['modules']['cloudtrail']['enable_kinesis'] = True LOGGER_CLI.info('Converting legacy CloudTrail config') config.write() kinesis_enabled = True cloudtrail_enabled = True existing_trail = modules['cloudtrail'].get('existing_trail', False) is_global_trail = modules['cloudtrail'].get('is_global_trail', True) region = config['global']['account']['region'] event_pattern_default = {'account': [config['global']['account']['aws_account_id']]} event_pattern = modules['cloudtrail'].get('event_pattern', event_pattern_default) # From here: http://amzn.to/2zF7CS0 valid_event_pattern_keys = { 'version', 'id', 'detail-type', 'source', 'account', 'time', 'region', 'resources', 'detail' } if not set(event_pattern.keys()).issubset(valid_event_pattern_keys): LOGGER_CLI.error('Config Error: Invalid CloudWatch Event Pattern!') return False module_info = { 'source': 'modules/tf_stream_alert_cloudtrail', 'account_ids': account_ids, 'cluster': cluster_name, 'prefix': config['global']['account']['prefix'], 'enable_logging': cloudtrail_enabled, 'enable_kinesis': kinesis_enabled, 's3_logging_bucket': config['global']['s3_access_logging']['logging_bucket'], 'existing_trail': existing_trail, 'send_to_cloudwatch': send_to_cloudwatch, 'exclude_home_region_events': exclude_home_region, 'region': region, 'is_global_trail': is_global_trail } # use the kinesis output from the kinesis streams module if kinesis_enabled: module_info['kinesis_arn'] = '${{module.kinesis_{}.arn}}'.format(cluster_name) module_info['event_pattern'] = json.dumps(event_pattern) if send_to_cloudwatch: destination_arn = modules['cloudtrail'].get( 'cloudwatch_destination_arn', '${{module.cloudwatch_{}_{}.cloudwatch_destination_arn}}'.format(cluster_name, region) ) module_info['cloudwatch_destination_arn'] = destination_arn cluster_dict['module'][cloudtrail_module] = module_info return True
def main(): parser = build_parser() options = parser.parse_args() cli_runner(options) LOGGER_CLI.info('Completed')
def _terraform_init(config): """Initialize infrastructure using Terraform Args: config (CLIConfig): Loaded StreamAlert CLI """ LOGGER_CLI.info('Initializing StreamAlert') # generate init Terraform files if not terraform_generate(config=config, init=True): return LOGGER_CLI.info('Initializing Terraform') if not run_command(['terraform', 'init']): sys.exit(1) # build init infrastructure LOGGER_CLI.info('Building Initial Infrastructure') init_targets = [ 'aws_s3_bucket.lambda_source', 'aws_s3_bucket.logging_bucket', 'aws_s3_bucket.stream_alert_secrets', 'aws_s3_bucket.terraform_remote_state', 'aws_s3_bucket.streamalerts', 'aws_kms_key.stream_alert_secrets', 'aws_kms_alias.stream_alert_secrets' ] if not tf_runner(targets=init_targets): LOGGER_CLI.error('An error occurred while running StreamAlert init') sys.exit(1) # generate the main.tf with remote state enabled LOGGER_CLI.info('Configuring Terraform Remote State') if not terraform_generate(config=config): return if not run_command(['terraform', 'init']): return # Use a named tuple to match the 'processor' attribute in the argparse options deploy_opts = namedtuple('DeployOptions', ['processor', 'clusters']) LOGGER_CLI.info('Deploying Lambda Functions') deploy(deploy_opts(['rule', 'alert', 'alert_merger', 'athena'], []), config) # we need to manually create the streamalerts table since terraform does not support this # See: https://github.com/terraform-providers/terraform-provider-aws/issues/1486 alerts_bucket = '{}.streamalerts'.format( config['global']['account']['prefix']) create_table('alerts', alerts_bucket, config) LOGGER_CLI.info('Building Remainder Infrastructure') tf_runner(refresh=False)
def rebuild_partitions(table, bucket, config): """Rebuild an Athena table's partitions Steps: - Get the list of current partitions - Destroy existing table - Re-create tables - Re-create partitions Args: table (str): The name of the table being rebuilt bucket (str): The s3 bucket to be used as the location for Athena data table_type (str): The type of table being refreshed Types of 'data' and 'alert' are accepted, but only 'data' is implemented config (CLIConfig): Loaded StreamAlert CLI """ athena_client = StreamAlertAthenaClient( config, results_key_prefix='stream_alert_cli') sa_firehose = StreamAlertFirehose( config['global']['account']['region'], config['global']['infrastructure']['firehose'], config['logs']) sanitized_table_name = sa_firehose.firehose_log_name(table) # Get the current set of partitions partition_success, partitions = athena_client.run_athena_query( query='SHOW PARTITIONS {}'.format(sanitized_table_name), database=athena_client.sa_database) if not partition_success: LOGGER_CLI.error('An error occurred when loading partitions for %s', sanitized_table_name) return unique_partitions = athena_helpers.unique_values_from_query(partitions) if not unique_partitions: LOGGER_CLI.info('No partitions to rebuild for %s, nothing to do', sanitized_table_name) return # Drop the table LOGGER_CLI.info('Dropping table %s', sanitized_table_name) drop_success, _ = athena_client.run_athena_query( query='DROP TABLE {}'.format(sanitized_table_name), database=athena_client.sa_database) if not drop_success: LOGGER_CLI.error('An error occurred when dropping the %s table', sanitized_table_name) return LOGGER_CLI.info('Dropped table %s', sanitized_table_name) LOGGER_CLI.info('Creating table %s', sanitized_table_name) # Re-create the table with previous partitions create_table(table, bucket, config) new_partitions_statement = athena_helpers.partition_statement( unique_partitions, bucket, sanitized_table_name) # Make sure our new alter table statement is within the query API limits if len(new_partitions_statement) > MAX_QUERY_LENGTH: LOGGER_CLI.error( 'Partition statement too large, writing to local file') with open('partitions_{}.txt'.format(sanitized_table_name), 'w') as partition_file: partition_file.write(new_partitions_statement) return LOGGER_CLI.info('Creating %d new partitions for %s', len(unique_partitions), sanitized_table_name) new_part_success, _ = athena_client.run_athena_query( query=new_partitions_statement, database=athena_client.sa_database) if not new_part_success: LOGGER_CLI.error('Error re-creating new partitions for %s', sanitized_table_name) return LOGGER_CLI.info('Successfully rebuilt partitions for %s', sanitized_table_name)
def terraform_generate(config, init=False): """Generate all Terraform plans for the configured clusters. Keyword Args: config (dict): The loaded config from the 'conf/' directory init (bool): Indicates if main.tf.json is generated for `terraform init` Returns: bool: Result of cluster generating """ cleanup_old_tf_files(config) # Setup the main.tf.json file LOGGER_CLI.debug('Generating cluster file: main.tf.json') with open('terraform/main.tf.json', 'w') as tf_file: json.dump( generate_main(init=init, config=config), tf_file, indent=2, sort_keys=True ) # Return early during the init process, clusters are not needed yet if init: return True # Setup cluster files for cluster in config.clusters(): if cluster in RESTRICTED_CLUSTER_NAMES: raise InvalidClusterName( 'Rename cluster "main" or "athena" to something else!') LOGGER_CLI.debug('Generating cluster file: %s.tf.json', cluster) cluster_dict = generate_cluster(cluster_name=cluster, config=config) if not cluster_dict: LOGGER_CLI.error( 'An error was generated while creating the %s cluster', cluster) return False with open('terraform/{}.tf.json'.format(cluster), 'w') as tf_file: json.dump( cluster_dict, tf_file, indent=2, sort_keys=True ) # Setup Athena if it is enabled athena_config = config['lambda'].get('athena_partition_refresh_config') if athena_config: athena_file = 'terraform/athena.tf.json' if athena_config['enabled']: athena_generated_config = generate_athena(config=config) if athena_generated_config: with open(athena_file, 'w') as tf_file: json.dump( athena_generated_config, tf_file, indent=2, sort_keys=True ) # Remove Athena file if it's disabled else: if os.path.isfile(athena_file): LOGGER_CLI.info('Removing old Athena Terraform file') os.remove(athena_file) return True
def create_table(athena_client, options, config): """Create a 'streamalert' Athena table Args: athena_client (boto3.client): Instantiated CLI AthenaClient options (namedtuple): The parsed args passed from the CLI config (CLIConfig): Loaded StreamAlert CLI """ if not options.bucket: LOGGER_CLI.error('Missing command line argument --bucket') return if not options.refresh_type: LOGGER_CLI.error('Missing command line argument --refresh_type') return if options.type == 'data': if not options.table_name: LOGGER_CLI.error('Missing command line argument --table_name') return if options.table_name not in terraform_cli_helpers.enabled_firehose_logs( config): LOGGER_CLI.error( 'Table name %s missing from configuration or ' 'is not enabled.', options.table_name) return if athena_client.check_table_exists(options.table_name): LOGGER_CLI.info('The \'%s\' table already exists.', options.table_name) return log_info = config['logs'][options.table_name.replace('_', ':', 1)] schema = dict(log_info['schema']) schema_statement = '' sanitized_schema = StreamAlert.sanitize_keys(schema) athena_schema = {} _add_to_athena_schema(sanitized_schema, athena_schema) # Support envelope keys configuration_options = log_info.get('configuration') if configuration_options: envelope_keys = configuration_options.get('envelope_keys') if envelope_keys: sanitized_envelope_key_schema = StreamAlert.sanitize_keys( envelope_keys) # Note: this key is wrapped in backticks to be Hive compliant _add_to_athena_schema(sanitized_envelope_key_schema, athena_schema, '`streamalert:envelope_keys`') for key_name, key_type in athena_schema.iteritems(): # Account for nested structs if isinstance(key_type, dict): struct_schema = ''.join([ '{0}:{1},'.format(sub_key, sub_type) for sub_key, sub_type in key_type.iteritems() ]) nested_schema_statement = '{0} struct<{1}>, '.format( key_name, # Use the minus index to remove the last comma struct_schema[:-1]) schema_statement += nested_schema_statement else: schema_statement += '{0} {1},'.format(key_name, key_type) query = ( 'CREATE EXTERNAL TABLE {table_name} ({schema}) ' 'PARTITIONED BY (dt string) ' 'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\' ' 'WITH SERDEPROPERTIES ( \'ignore.malformed.json\' = \'true\') ' 'LOCATION \'s3://{bucket}/{table_name}/\''.format( table_name=options.table_name, # Use the minus index to remove the last comma schema=schema_statement[:-1], bucket=options.bucket)) elif options.type == 'alerts': if athena_client.check_table_exists(options.type): LOGGER_CLI.info('The \'alerts\' table already exists.') return query = ('CREATE EXTERNAL TABLE alerts (' 'log_source string,' 'log_type string,' 'outputs array<string>,' 'record string,' 'rule_description string,' 'rule_name string,' 'source_entity string,' 'source_service string)' 'PARTITIONED BY (dt string)' 'ROW FORMAT SERDE \'org.openx.data.jsonserde.JsonSerDe\'' 'LOCATION \'s3://{bucket}/alerts/\''.format( bucket=options.bucket)) if query: create_table_success, _ = athena_client.run_athena_query( query=query, database='streamalert') if create_table_success: # Update the CLI config config['lambda']['athena_partition_refresh_config'] \ ['refresh_type'][options.refresh_type][options.bucket] = options.type config.write() table_name = options.type if options.type == 'alerts' else options.table_name LOGGER_CLI.info('The %s table was successfully created!', table_name)
def terraform_handler(options): """Handle all Terraform CLI operations""" # Verify terraform is installed if not terraform_check(): return # Use a named tuple to match the 'processor' attribute in the argparse options deploy_opts = namedtuple('DeployOptions', ['processor']) # Plan and Apply our streamalert infrastructure if options.subcommand == 'build': # Generate Terraform files if not terraform_generate(config=CONFIG): return # Target is for terraforming a specific streamalert module. # This value is passed as a list if options.target: targets = ['module.{}_{}'.format(target, cluster) for cluster in CONFIG.clusters() for target in options.target] tf_runner(targets=targets) else: tf_runner() # generate terraform files elif options.subcommand == 'generate': if not terraform_generate(config=CONFIG): return elif options.subcommand == 'init-backend': run_command(['terraform', 'init']) # initialize streamalert infrastructure from a blank state elif options.subcommand == 'init': LOGGER_CLI.info('Initializing StreamAlert') # generate init Terraform files if not terraform_generate(config=CONFIG, init=True): return LOGGER_CLI.info('Initializing Terraform') if not run_command(['terraform', 'init']): sys.exit(1) # build init infrastructure LOGGER_CLI.info('Building Initial Infrastructure') init_targets = [ 'aws_s3_bucket.lambda_source', 'aws_s3_bucket.logging_bucket', 'aws_s3_bucket.stream_alert_secrets', 'aws_s3_bucket.terraform_remote_state', 'aws_s3_bucket.streamalerts', 'aws_kms_key.stream_alert_secrets', 'aws_kms_alias.stream_alert_secrets' ] if not tf_runner(targets=init_targets): LOGGER_CLI.error('An error occured while running StreamAlert init') sys.exit(1) # generate the main.tf with remote state enabled LOGGER_CLI.info('Configuring Terraform Remote State') if not terraform_generate(config=CONFIG): return if not run_command(['terraform', 'init']): return LOGGER_CLI.info('Deploying Lambda Functions') # deploy both lambda functions deploy(deploy_opts('all')) # create all remainder infrastructure LOGGER_CLI.info('Building Remainder Infrastructure') tf_runner() elif options.subcommand == 'clean': terraform_clean() elif options.subcommand == 'destroy': if options.target: target = options.target targets = ['module.{}_{}'.format(target, cluster) for cluster in CONFIG.clusters()] tf_runner(targets=targets, action='destroy') return # Migrate back to local state so Terraform can successfully # destroy the S3 bucket used by the backend. if not terraform_generate(config=CONFIG, init=True): return if not run_command(['terraform', 'init']): return # Destroy all of the infrastructure if not tf_runner(action='destroy'): return # Remove old Terraform files terraform_clean() # get a quick status on our declared infrastructure elif options.subcommand == 'status': status()
def generate_monitoring(cluster_name, cluster_dict, config): """Add the CloudWatch Monitoring module to the Terraform cluster dict. Example configuration: "cloudwatch_monitoring": { "enabled": true, "kinesis_alarms_enabled": true, "lambda_alarms_enabled": true, "settings": { "lambda_invocation_error_period": "600", "kinesis_iterator_age_error_period": "600", "kinesis_write_throughput_exceeded_threshold": "100" } } Args: cluster_name (str): The name of the currently generating cluster cluster_dict (defaultdict): The dict containing all Terraform config for a given cluster. config (dict): The loaded config from the 'conf/' directory Returns: bool: Result of applying the cloudwatch_monitoring module """ prefix = config['global']['account']['prefix'] infrastructure_config = config['global'].get('infrastructure') monitoring_config = config['clusters'][cluster_name]['modules']['cloudwatch_monitoring'] sns_topic_arn = None if not (infrastructure_config and 'monitoring' in infrastructure_config): LOGGER_CLI.error('Invalid config: Make sure you declare global infrastructure options!') return False if not monitoring_config.get('enabled', False): LOGGER_CLI.info('CloudWatch Monitoring not enabled, skipping...') return True if infrastructure_config['monitoring'].get('create_sns_topic'): topic_name = 'stream_alert_monitoring' elif infrastructure_config['monitoring'].get('sns_topic_name'): topic_name = infrastructure_config['monitoring']['sns_topic_name'] sns_topic_arn = 'arn:aws:sns:{region}:{account_id}:{topic}'.format( region=config['global']['account']['region'], account_id=config['global']['account']['aws_account_id'], topic=topic_name) cluster_dict['module']['cloudwatch_monitoring_{}'.format(cluster_name)] = { 'source': 'modules/tf_stream_alert_monitoring', 'sns_topic_arn': sns_topic_arn, 'kinesis_alarms_enabled': False, 'lambda_alarms_enabled': False } if monitoring_config.get('lambda_alarms_enabled', True): cluster_dict['module']['cloudwatch_monitoring_{}'.format(cluster_name)].update({ 'lambda_functions': [ '{}_{}_streamalert_rule_processor'.format(prefix, cluster_name), '{}_{}_streamalert_alert_processor'.format(prefix, cluster_name) ], 'lambda_alarms_enabled': True }) if monitoring_config.get('kinesis_alarms_enabled', True): cluster_dict['module']['cloudwatch_monitoring_{}'.format(cluster_name)].update({ 'kinesis_stream': '{}_{}_stream_alert_kinesis'.format(prefix, cluster_name), 'kinesis_alarms_enabled': True }) # Add support for custom settings for tweaking alarm thresholds, eval periods, and periods # Note: This does not strictly check for proper variable names, since there are so many. # Instead, Terraform will error out if an imporper name is used. # Also, every value in these settings should be a string, so cast for safety. for setting_name, setting_value in monitoring_config.get('settings', {}).iteritems(): cluster_dict['module']['cloudwatch_monitoring_{}'.format( cluster_name)][setting_name] = str(setting_value) return True
def create_table(athena_client, options, config): """Create a 'streamalert' Athena table Args: athena_client (boto3.client): Instantiated CLI AthenaClient options (namedtuple): The parsed args passed from the CLI config (CLIConfig): Loaded StreamAlert CLI """ sa_firehose = StreamAlertFirehose( config['global']['account']['region'], config['global']['infrastructure']['firehose'], config['logs']) if not options.bucket: LOGGER_CLI.error('Missing command line argument --bucket') return if not options.refresh_type: LOGGER_CLI.error('Missing command line argument --refresh_type') return if options.type == 'data': if not options.table_name: LOGGER_CLI.error('Missing command line argument --table_name') return # Convert special characters in schema name to underscores sanitized_table_name = sa_firehose.firehose_log_name( options.table_name) # Check that the log type is enabled via Firehose if sanitized_table_name not in sa_firehose.enabled_logs: LOGGER_CLI.error( 'Table name %s missing from configuration or ' 'is not enabled.', sanitized_table_name) return # Check if the table exists if athena_client.check_table_exists(sanitized_table_name): LOGGER_CLI.info('The \'%s\' table already exists.', sanitized_table_name) return log_info = config['logs'][options.table_name.replace('_', ':', 1)] schema = dict(log_info['schema']) sanitized_schema = StreamAlertFirehose.sanitize_keys(schema) athena_schema = handler_helpers.to_athena_schema(sanitized_schema) # Add envelope keys to Athena Schema configuration_options = log_info.get('configuration') if configuration_options: envelope_keys = configuration_options.get('envelope_keys') if envelope_keys: sanitized_envelope_key_schema = StreamAlertFirehose.sanitize_keys( envelope_keys) # Note: this key is wrapped in backticks to be Hive compliant athena_schema[ '`streamalert:envelope_keys`'] = handler_helpers.to_athena_schema( sanitized_envelope_key_schema) # Handle Schema overrides # This is useful when an Athena schema needs to differ from the normal log schema if options.schema_override: for override in options.schema_override: if '=' not in override: LOGGER_CLI.error( 'Invalid schema override [%s], use column_name=type format', override) return column_name, column_type = override.split('=') if not all([column_name, column_type]): LOGGER_CLI.error( 'Invalid schema override [%s], use column_name=type format', override) # Columns are escaped to avoid Hive issues with special characters column_name = '`{}`'.format(column_name) if column_name in athena_schema: athena_schema[column_name] = column_type LOGGER_CLI.info('Applied schema override: %s:%s', column_name, column_type) else: LOGGER_CLI.error( 'Schema override column %s not found in Athena Schema, skipping', column_name) query = _construct_create_table_statement( schema=athena_schema, table_name=sanitized_table_name, bucket=options.bucket) elif options.type == 'alerts': if athena_client.check_table_exists(options.type): LOGGER_CLI.info('The \'alerts\' table already exists.') return query = ALERTS_TABLE_STATEMENT.format(bucket=options.bucket) if query: create_table_success, _ = athena_client.run_athena_query( query=query, database='streamalert') if create_table_success: # Update the CLI config config['lambda']['athena_partition_refresh_config'] \ ['refresh_type'][options.refresh_type][options.bucket] = options.type config.write() table_name = options.type if options.type == 'alerts' else sanitized_table_name LOGGER_CLI.info('The %s table was successfully created!', table_name)
def terraform_handler(options, config): """Handle all Terraform CLI operations Args: options (namedtuple): Parsed arguments from manage.py """ # Check for valid credentials if not check_credentials(): return # Verify terraform is installed if not terraform_check(): return # Use a named tuple to match the 'processor' attribute in the argparse options deploy_opts = namedtuple('DeployOptions', ['processor', 'clusters']) # Plan and Apply our streamalert infrastructure if options.subcommand == 'build': terraform_build(options, config) # generate terraform files elif options.subcommand == 'generate': if not terraform_generate(config=config): return elif options.subcommand == 'init-backend': run_command(['terraform', 'init']) # initialize streamalert infrastructure from a blank state elif options.subcommand == 'init': LOGGER_CLI.info('Initializing StreamAlert') # generate init Terraform files if not terraform_generate(config=config, init=True): return LOGGER_CLI.info('Initializing Terraform') if not run_command(['terraform', 'init']): sys.exit(1) # build init infrastructure LOGGER_CLI.info('Building Initial Infrastructure') init_targets = [ 'aws_s3_bucket.lambda_source', 'aws_s3_bucket.logging_bucket', 'aws_s3_bucket.stream_alert_secrets', 'aws_s3_bucket.terraform_remote_state', 'aws_s3_bucket.streamalerts', 'aws_kms_key.stream_alert_secrets', 'aws_kms_alias.stream_alert_secrets' ] if not tf_runner(targets=init_targets): LOGGER_CLI.error('An error occured while running StreamAlert init') sys.exit(1) # generate the main.tf with remote state enabled LOGGER_CLI.info('Configuring Terraform Remote State') if not terraform_generate(config=config): return if not run_command(['terraform', 'init']): return LOGGER_CLI.info('Deploying Lambda Functions') # deploy both lambda functions deploy(deploy_opts(['rule', 'alert'], []), config) # create all remainder infrastructure LOGGER_CLI.info('Building Remainder Infrastructure') tf_runner() elif options.subcommand == 'clean': if not continue_prompt( message='Are you sure you want to clean all Terraform files?'): sys.exit(1) terraform_clean(config) elif options.subcommand == 'destroy': if not continue_prompt(message='Are you sure you want to destroy?'): sys.exit(1) if options.target: targets = [] # Iterate over any targets to destroy. Global modules, like athena # are prefixed with `stream_alert_` while cluster based modules # are a combination of the target and cluster name for target in options.target: if target == 'athena': targets.append('module.stream_alert_{}'.format(target)) elif target == 'threat_intel_downloader': targets.append('module.threat_intel_downloader') else: targets.extend([ 'module.{}_{}'.format(target, cluster) for cluster in config.clusters() ]) tf_runner(targets=targets, action='destroy') return # Migrate back to local state so Terraform can successfully # destroy the S3 bucket used by the backend. if not terraform_generate(config=config, init=True): return if not run_command(['terraform', 'init']): return # Destroy all of the infrastructure if not tf_runner(action='destroy'): return # Remove old Terraform files terraform_clean(config) # get a quick status on our declared infrastructure elif options.subcommand == 'status': terraform_status(config)
def rebuild_partitions(athena_client, options, config): """Rebuild an Athena table's partitions Steps: - Get the list of current partitions - Destroy existing table - Re-create tables - Re-create partitions Args: athena_client (boto3.client): Instantiated CLI AthenaClient options (namedtuple): The parsed args passed from the CLI config (CLIConfig): Loaded StreamAlert CLI """ if not options.table_name: LOGGER_CLI.error('Missing command line argument --table_name') return if not options.bucket: LOGGER_CLI.error('Missing command line argument --bucket') return sa_firehose = StreamAlertFirehose( config['global']['account']['region'], config['global']['infrastructure']['firehose'], config['logs']) sanitized_table_name = sa_firehose.firehose_log_name(options.table_name) if options.type == 'data': # Get the current set of partitions partition_success, partitions = athena_client.run_athena_query( query='SHOW PARTITIONS {}'.format(sanitized_table_name), database='streamalert') if not partition_success: LOGGER_CLI.error('An error occured when loading partitions for %s', sanitized_table_name) return unique_partitions = athena_helpers.unique_values_from_query(partitions) # Drop the table LOGGER_CLI.info('Dropping table %s', sanitized_table_name) drop_success, _ = athena_client.run_athena_query( query='DROP TABLE {}'.format(sanitized_table_name), database='streamalert') if not drop_success: LOGGER_CLI.error('An error occured when dropping the %s table', sanitized_table_name) return LOGGER_CLI.info('Dropped table %s', sanitized_table_name) new_partitions_statement = athena_helpers.partition_statement( unique_partitions, options.bucket, sanitized_table_name) # Make sure our new alter table statement is within the query API limits if len(new_partitions_statement) > MAX_QUERY_LENGTH: LOGGER_CLI.error( 'Partition statement too large, writing to local file') with open('partitions_{}.txt'.format(sanitized_table_name), 'w') as partition_file: partition_file.write(new_partitions_statement) return # Re-create the table with previous partitions options.refresh_type = 'add_hive_partition' create_table(athena_client, options, config) LOGGER_CLI.info('Creating %d new partitions for %s', len(unique_partitions), sanitized_table_name) new_part_success, _ = athena_client.run_athena_query( query=new_partitions_statement, database='streamalert') if not new_part_success: LOGGER_CLI.error('Error re-creating new partitions for %s', sanitized_table_name) return LOGGER_CLI.info('Successfully rebuilt partitions for %s', sanitized_table_name) else: LOGGER_CLI.info('Refreshing alerts tables unsupported')
def create_table(table, bucket, config, schema_override=None): """Create a 'streamalert' Athena table Args: table (str): The name of the table being rebuilt bucket (str): The s3 bucket to be used as the location for Athena data table_type (str): The type of table being refreshed config (CLIConfig): Loaded StreamAlert CLI schema_override (set): An optional set of key=value pairs to be used for overriding the configured column_name=value_type. """ enabled_logs = FirehoseClient.load_enabled_log_sources( config['global']['infrastructure']['firehose'], config['logs'] ) # Convert special characters in schema name to underscores sanitized_table_name = FirehoseClient.firehose_log_name(table) # Check that the log type is enabled via Firehose if sanitized_table_name != 'alerts' and sanitized_table_name not in enabled_logs: LOGGER_CLI.error('Table name %s missing from configuration or ' 'is not enabled.', sanitized_table_name) return athena_client = get_athena_client(config) # Check if the table exists if athena_client.check_table_exists(sanitized_table_name): LOGGER_CLI.info('The \'%s\' table already exists.', sanitized_table_name) return if table == 'alerts': # get a fake alert so we can get the keys needed and their types alert = Alert('temp_rule_name', {}, {}) output = alert.output_dict() schema = record_to_schema(output) athena_schema = helpers.logs_schema_to_athena_schema(schema) query = _construct_create_table_statement( schema=athena_schema, table_name=table, bucket=bucket) else: # all other tables are log types log_info = config['logs'][table.replace('_', ':', 1)] schema = dict(log_info['schema']) sanitized_schema = FirehoseClient.sanitize_keys(schema) athena_schema = helpers.logs_schema_to_athena_schema(sanitized_schema) # Add envelope keys to Athena Schema configuration_options = log_info.get('configuration') if configuration_options: envelope_keys = configuration_options.get('envelope_keys') if envelope_keys: sanitized_envelope_key_schema = FirehoseClient.sanitize_keys(envelope_keys) # Note: this key is wrapped in backticks to be Hive compliant athena_schema['`streamalert:envelope_keys`'] = helpers.logs_schema_to_athena_schema( sanitized_envelope_key_schema) # Handle Schema overrides # This is useful when an Athena schema needs to differ from the normal log schema if schema_override: for override in schema_override: column_name, column_type = override.split('=') if not all([column_name, column_type]): LOGGER_CLI.error('Invalid schema override [%s], use column_name=type format', override) # Columns are escaped to avoid Hive issues with special characters column_name = '`{}`'.format(column_name) if column_name in athena_schema: athena_schema[column_name] = column_type LOGGER_CLI.info('Applied schema override: %s:%s', column_name, column_type) else: LOGGER_CLI.error( 'Schema override column %s not found in Athena Schema, skipping', column_name) query = _construct_create_table_statement( schema=athena_schema, table_name=sanitized_table_name, bucket=bucket) success = athena_client.run_query(query=query) if not success: LOGGER_CLI.error('The %s table could not be created', sanitized_table_name) return # Update the CLI config if (table != 'alerts' and bucket not in config['lambda']['athena_partition_refresh_config']['buckets']): config['lambda']['athena_partition_refresh_config']['buckets'][bucket] = 'data' config.write() LOGGER_CLI.info('The %s table was successfully created!', sanitized_table_name)