def get_configs(self): meta_configs, stanza_configs = self.get_modinput_configs() self.meta_configs = meta_configs self.stanza_configs = stanza_configs self.cred_manager = cred.CredentialManager(meta_configs["session_key"], meta_configs["server_uri"]) self.app_dir = util.get_app_path(op.abspath(__file__)) self.app = op.basename(self.app_dir) return meta_configs, stanza_configs
class AWSConfigRuleConf(object): app_dir = scutil.get_app_path(op.abspath(__file__)) app_file = op.join(app_dir, "local", "app.conf") passwords = "passwords" passwords_file_w_path = op.join(app_dir, "local", passwords + ".conf") task_file = "aws_config_rule_tasks" task_file_w_path = op.join(app_dir, "local", task_file + ".conf") conf_file = "aws_config_rule" conf_file_w_path = op.join(app_dir, "local", conf_file + ".conf") def __init__(self): self.metas, self.stanza_configs = tacommon.get_modinput_configs() self.metas[tac.app_name] = tac.splunk_ta_aws def get_tasks(self): conf_mgr = cm.ConfManager(self.metas[tac.server_uri], self.metas[tac.session_key]) tasks = self._get_config_rule_tasks(conf_mgr) settings = conf_mgr.all_stanzas_as_dicts(self.conf_file, do_reload=False) proxy_info = tpc.get_proxy_info(self.metas[tac.session_key]) # set proxy here for validating credentials tacommon.set_proxy_env(proxy_info) set_log_level(settings[tac.log_stanza][tac.log_level]) valid_tasks = [] for task in tasks: try: # validate credentials tacommon.get_service_client(task, tac.config) task[tac.log_level] = settings[tac.log_stanza][tac.log_level] task.update(settings[tac.global_settings]) task.update(proxy_info) valid_tasks.append(task) except Exception as e: input_name = scutil.extract_datainput_name(task[tac.name]) logger.exception( 'Failed to load credentials, ignore this input.', datainput=input_name) return tacommon.handle_hec(valid_tasks, "aws_config_rule") def _get_config_rule_tasks(self, conf_mgr): stanzas = conf_mgr.all_stanzas(self.task_file, do_reload=False) tasks = [] for stanza in stanzas: if scutil.is_true(stanza.get(tac.disabled)): continue stanza[tac.server_uri] = self.metas[tac.server_uri] stanza[tac.session_key] = self.metas[tac.session_key] # Normalize tac.account to tac.aws_account stanza[tac.aws_account] = stanza.get(tac.account) tasks.extend(self._expand_tasks(stanza)) return tasks def _expand_tasks(self, stanza): tasks = [] regions = stanza[tac.region].split(",") rule_names = stanza.get(acc.rule_names, []) if rule_names: names = rule_names rule_names = [] for rule in names.split(","): rule = rule.strip() if rule: rule_names.append(rule) for region in regions: task = copy.copy(stanza) task[tac.region] = region.strip() task[tac.polling_interval] = int(stanza[tac.polling_interval]) task[tac.is_secure] = True task[tac.datainput] = task[tac.stanza_name] task[acc.rule_names] = rule_names task.update(self.metas) tasks.append(task) return tasks
class AWSDescribeConf(object): app_dir = scutil.get_app_path(op.abspath(__file__)) app_file = op.join(app_dir, "local", "app.conf") conf_file = "aws_description" conf_file_w_path = op.join(app_dir, "local", conf_file + ".conf") task_file = "aws_description_tasks" task_file_w_path = op.join(app_dir, "local", task_file + ".conf") passwords = "passwords" passwords_file_w_path = op.join(app_dir, "local", passwords + ".conf") def __init__(self): self.metas, self.stanza_configs = tacommon.get_modinput_configs() self.metas[tac.app_name] = tac.splunk_ta_aws def get_tasks(self): conf_mgr = cm.ConfManager(self.metas[tac.server_uri], self.metas[tac.session_key]) tasks = self._get_description_tasks(conf_mgr) logging_settings = conf_mgr.get_stanza(self.conf_file, tac.log_stanza, do_reload=False) # set logging level for our logger set_log_level(logging_settings[tac.log_level]) proxy_info = tpc.get_proxy_info(self.metas[tac.session_key]) # Set proxy for loading credentials by boto3 tacommon.set_proxy_env(proxy_info) for task in tasks: task[tac.log_level] = logging_settings[tac.log_level] task.update(proxy_info) self._assign_source(tasks) return tasks def _get_description_tasks(self, conf_mgr): stanzas = conf_mgr.all_stanzas(self.task_file, do_reload=False) tasks, creds = [], {} for stanza in stanzas: if scutil.is_true(stanza.get(tac.disabled)): continue # Normalize tac.account to tac.aws_account stanza[tac.aws_account] = stanza.get(tac.account) tasks.extend(self._expand_tasks(stanza, creds)) return tasks def _expand_tasks(self, stanza, creds): tasks = [] for api_interval in stanza[adc.apis].split(","): api_interval = api_interval.split("/") api_name = api_interval[0].strip() api_interval = int(api_interval[1].strip()) for region in stanza[tac.regions].split(","): region = region.strip() tasks.append({ tac.server_uri: self.metas[tac.server_uri], tac.session_key: self.metas[tac.session_key], tac.aws_account: stanza[tac.aws_account], tac.aws_iam_role: stanza.get(tac.aws_iam_role), tac.region: region, adc.api: api_name, tac.interval: api_interval, tac.is_secure: True, tac.index: stanza[tac.index], tac.sourcetype: stanza[tac.sourcetype], tac.datainput: stanza[tac.name] }) if api_name in adc.global_resources: break return tasks def _assign_source(self, tasks): for task in tasks: if not task.get(tac.source): task[tac.source] = "{region}:{api}".format(**task)
class AWSInspectorConf(object): app_dir = scutil.get_app_path(op.abspath(__file__)) app_file = op.join(app_dir, "local", "app.conf") passwords = "passwords" passwords_file_w_path = op.join(app_dir, "local", passwords + ".conf") task_file = "aws_inspector_tasks" task_file_w_path = op.join(app_dir, "local", task_file + ".conf") conf_file = "aws_inspector" conf_file_w_path = op.join(app_dir, "local", conf_file + ".conf") def __init__(self): self.metas, self.stanza_configs = tacommon.get_modinput_configs() self.metas[tac.app_name] = tac.splunk_ta_aws def get_tasks(self): conf_mgr = cm.ConfManager(self.metas[tac.server_uri], self.metas[tac.session_key]) stanzas = conf_mgr.all_stanzas(self.task_file, do_reload=False) settings = conf_mgr.all_stanzas_as_dicts(self.conf_file, do_reload=False) proxy_info = tpc.get_proxy_info(self.metas[tac.session_key]) # set proxy here for validating credentials tacommon.set_proxy_env(proxy_info) level = settings[tac.log_stanza][tac.log_level] set_log_level(level) tasks = self._get_inspector_tasks(stanzas, settings, proxy_info) config = dict() config.update(self.metas) config.update(settings[tac.global_settings]) _cleanup_checkpoints(tasks, config) tasks = [ task for task in tasks if not scutil.is_true(task.get('disabled')) ] return tacommon.handle_hec(tasks, "aws_inspector") def _get_inspector_tasks(self, stanzas, settings, proxy_info): tasks = [] for stanza in stanzas: merged = dict(self.metas) merged[tac.log_level] = settings[tac.log_stanza][tac.log_level] merged.update(settings[tac.global_settings]) merged.update(proxy_info) # Make sure the 'disabled' field not to be overridden accidentally. merged.update(stanza) # Normalize tac.account to tac.aws_account merged[tac.aws_account] = merged.get(tac.account) tasks.extend(self._expand_tasks(merged)) return tasks def _expand_tasks(self, stanza): tasks = [] regions = stanza[tac.regions].split(",") for region in regions: task = copy.copy(stanza) task[tac.region] = region.strip() task[tac.polling_interval] = int(stanza[tac.polling_interval]) task[tac.is_secure] = True task[tac.datainput] = task[tac.stanza_name] try: tacommon.get_service_client(task, tac.inspector) tasks.append(task) except Exception as e: input_name = scutil.extract_datainput_name(task[tac.name]) logger.exception( 'Failed to load credentials, ingore this input.', datainput=input_name, region=region) return tasks
class AWSS3Conf(object): app_dir = scutil.get_app_path(op.abspath(__file__)) app_file = op.join(app_dir, "local", "app.conf") task_file = "inputs" task_file_w_path = op.join(app_dir, "local", task_file + ".conf") passwords = "passwords" passwords_file_w_path = op.join(app_dir, "local", passwords + ".conf") log_info = "log_info" log_info_w_path = op.join(app_dir, "local", log_info + ".conf") def __init__(self): self.metas, self.stanza_configs = tacommon.get_modinput_configs() self.metas[tac.app_name] = tac.splunk_ta_aws def get_tasks(self): with logging.LogContext(phase="prepare"): return self._get_tasks() def _get_tasks(self): if not self.stanza_configs: return None conf_mgr = cm.ConfManager(self.metas[tac.server_uri], self.metas[tac.session_key]) logging_settings = conf_mgr.get_stanza(self.log_info, asc.log_stanza, do_reload=False) # set the log level read from conf for our logger set_log_level(logging_settings[asc.log_level]) # entry point for this stanza task, setup root logger here # Generic S3 can be configured to be single-instance # or multiple instance # through env variable stanza_name = '' try: if len(self.stanza_configs) == 1: # only one stanza exists stanza_name = self.stanza_configs[0].get('name', '') except Exception: logger.exception('Failed to get stanza name!') stanza_name = extract_datainput_name(stanza_name) logging.setup_root_logger(app_name=tac.splunk_ta_aws, modular_name=asc.mod_name, stanza_name=stanza_name) proxy_info = tpc.get_proxy_info(self.metas[tac.session_key]) tasks, creds = [], {} for stanza in self.stanza_configs: task = {} task.update(stanza) task.update(self.metas) task.update(proxy_info) task[tac.log_level] = logging_settings[asc.log_level] task[tac.interval] = tacommon.get_interval(task, 3600) task[tac.polling_interval] = task[tac.interval] task[asc.max_retries] = int(task.get(asc.max_retries, 3)) task[asc.prefix] = task.get(asc.key_name) task[asc.last_modified] = self._get_last_modified_time( task[asc.initial_scan_datetime]) task[ asc. terminal_scan_datetime] = self._convert_terminal_scan_datetime( task.get(asc.terminal_scan_datetime)) input_name = scutil.extract_datainput_name(task[tac.name]) task[asc.data_input] = input_name task[tac.sourcetype] = task.get(tac.sourcetype, "aws:s3") task[asc.bucket_name] = str(task[asc.bucket_name]) if not task.get(asc.whitelist): task[asc.whitelist] = s3common.sourcetype_to_keyname_regex.get( task[tac.sourcetype]) tasks.append(task) logger.info("Done with configuration read from conf.") s3ckpt.handle_ckpts(tasks) return tasks def _get_last_modified_time(self, scan_datetime): if not scan_datetime or scan_datetime.strip() == "default": stime = datetime.datetime.utcnow() + datetime.timedelta(days=-7) else: stime = tacommon.parse_datetime(self.metas[tac.server_uri], self.metas[tac.session_key], scan_datetime) return stime.strftime("%Y-%m-%dT%H:%M:%S.000Z") def _convert_terminal_scan_datetime(self, terminal_scan_datetime): if not terminal_scan_datetime: return 'z' else: stime = tacommon.parse_datetime( self.metas[tac.server_uri], self.metas[tac.session_key], terminal_scan_datetime, ) return stime.strftime("%Y-%m-%dT%H:%M:%S.000Z")
class AWSCloudWatchLogsConf(object): app_dir = scutil.get_app_path(op.abspath(__file__)) app_file = op.join(app_dir, "local", "app.conf") conf_file = "aws_cloudwatch_logs" conf_file_w_path = op.join(app_dir, "local", conf_file + '.conf') task_file = "aws_cloudwatch_logs_tasks" task_file_w_path = op.join(app_dir, "local", task_file + '.conf') passwords = "passwords" passwords_file_w_path = op.join(app_dir, "local", passwords + ".conf") def __init__(self): self.metas, self.stanza_configs = tacommon.get_modinput_configs() self.metas[tac.app_name] = tac.splunk_ta_aws def get_tasks(self): conf_mgr = cm.ConfManager(self.metas[tac.server_uri], self.metas[tac.session_key]) tasks = self._get_cloudwatch_logs_tasks(conf_mgr) logging_settings = conf_mgr.get_stanza(self.conf_file, tac.log_stanza, do_reload=False) set_log_level(logging_settings[tac.log_level]) proxy_info = tpc.get_proxy_info(self.metas[tac.session_key]) for task in tasks: task[tac.log_level] = logging_settings[tac.log_level] task.update(proxy_info) return tasks def _get_cloudwatch_logs_tasks(self, conf_mgr): stanzas = conf_mgr.all_stanzas(self.task_file, do_reload=False) tasks, creds = [], {} for stanza in stanzas: if scutil.is_true(stanza.get(tac.disabled)): continue # Normalize tac.account to tac.aws_account stanza[tac.aws_account] = stanza.get(tac.account) try: expanded = self._expand_tasks(stanza, creds) except Exception: logger.error("Failed to parse configuration, error=%s", traceback.format_exc()) continue tasks.extend(expanded) return tasks def _expand_tasks(self, stanza, creds): key_id, secret_key = tacommon.get_aws_creds(stanza, self.metas, creds) groups = stanza[aclc.groups].split(",") date_fmt = "%Y-%m-%dT%H:%M:%S" try: dt = datetime.strptime(stanza[aclc.only_after].strip(), date_fmt) except ValueError: logger.error("Invalid datetime=%s, expected format=%s", stanza[aclc.only_after], date_fmt) return [] only_after = scutil.datetime_to_seconds(dt) * 1000 stream_matcher = re.compile( stanza.get(aclc.stream_matcher, "").strip() or ".*") tasks = [] for log_group_name in groups: log_group_name = log_group_name.strip() tasks.append({ aclc.lock: threading.Lock(), tac.region: stanza[tac.region], tac.interval: int(stanza[tac.interval].strip()), tac.key_id: key_id, tac.secret_key: secret_key, tac.is_secure: True, tac.index: stanza[tac.index], tac.sourcetype: stanza[tac.sourcetype], tac.checkpoint_dir: self.metas[tac.checkpoint_dir], tac.stanza_name: stanza[tac.stanza_name], aclc.log_group_name: log_group_name, aclc.stream_matcher: stream_matcher, aclc.only_after: only_after, aclc.delay: stanza[aclc.delay], tac.app_name: self.metas[tac.app_name], }) return tasks
class AWSCloudWatchConf(object): app_dir = scutil.get_app_path(op.abspath(__file__)) app_file = op.join(app_dir, "local", "app.conf") task_file = "inputs" task_file_w_path = op.join(app_dir, "local", task_file + ".conf") passwords = "passwords" passwords_file_w_path = op.join(app_dir, "local", passwords + ".conf") conf_file = "aws_cloudwatch" conf_file_w_path = op.join(app_dir, "local", conf_file + ".conf") log_info = "log_info" log_info_w_path = op.join(app_dir, "local", log_info + ".conf") def __init__(self): self.metas, self.stanza_configs = tacommon.get_modinput_configs() self.metas[tac.app_name] = tac.splunk_ta_aws self.server_uri = self.metas[tac.server_uri] self.session_key = self.metas[tac.session_key] def get_tasks(self): if not self.stanza_configs: return None conf_mgr = cm.ConfManager(self.server_uri, self.session_key) settings = conf_mgr.all_stanzas_as_dicts(self.conf_file, do_reload=False) # set logging level for our logger set_log_level(settings[tac.log_stanza][tac.log_level]) proxy_info = tpc.get_proxy_info(self.session_key) tasks, creds = {}, {} for stanza in self.stanza_configs: input_name = scutil.extract_datainput_name(stanza[tac.name]) with logging.LogContext(datainput=input_name): stanza[tac.interval] = tacommon.get_interval(stanza, 60) stanza[tac.polling_interval] = stanza[tac.interval] stanza[acc.period] = int(stanza[acc.period]) if stanza[acc.period] > 86400 or stanza[acc.period] < 60: logger.error( "Granularity(period) is not in range[60, 86400], ignore this input.", Period=stanza[acc.period], ErrorCode="ConfigurationError", ErrorDetail= "Invalid Granularity(period). It's out of range [60, 86400]." ) continue if stanza[tac.polling_interval] % stanza[acc.period]: logger.error( "Polling interval is not multiple of period, ignore this input.", Period=stanza[acc.period], ErrorCode="ConfigurationError", ErrorDetail= "Polling interval should be a multiple of granularity(period)." ) continue stanza[tac.datainput] = input_name stanza[tac.sourcetype] = stanza.get(tac.sourcetype, "aws:cloudwatch") metric_names = stanza[acc.metric_names].strip() if metric_names != ".*": metric_names = json.loads(metric_names) else: metric_names = None stanza[acc.metric_names] = metric_names stanza[acc.metric_dimensions] = json.loads( stanza[acc.metric_dimensions]) stanza[acc.statistics] = json.loads(stanza[acc.statistics]) stanza[tac.log_level] = settings[tac.log_stanza][tac.log_level] stanza[tac.aws_account] = stanza.get('aws_account') stanza[tac.aws_iam_role] = stanza.get('aws_iam_role') stanza[acc.use_metric_format] = scutil.is_true( stanza.get(acc.use_metric_format, False)) stanza.update(self.metas) stanza.update(proxy_info) stanza.update(settings[tac.global_settings]) stanza[tac.use_hec] = scutil.is_true( stanza.get(tac.use_hec, False)) stanza[acc.max_api_saver_time] = \ int(stanza.get(acc.max_api_saver_time, 7200)) region_tasks = {} tasks[stanza[tac.datainput]] = region_tasks for region in stanza[tac.aws_region].split(","): region = region.strip() if not region: continue task = {} task.update(stanza) task[tac.aws_region] = region task[tac.region] = region num, rtasks = self._expand_task(task) if rtasks: region_tasks[region] = rtasks stanza[region] = num if not region_tasks: logger.warning("No metric/dimension has been found.") all_tasks = [] for region_tasks in tasks.itervalues(): for rtasks in region_tasks.itervalues(): all_tasks.extend(rtasks) tacommon.handle_hec(all_tasks, "aws_cloudwatch") return all_tasks @staticmethod def _get_batch_size(total_num): min_batch_size = 10 min_batch = 10 max_batch_env = int(os.environ.get("aws_cloudwatch_max_batch", "200")) max_batch = min(max_batch_env, 64 * acdl.AwsDataLoaderManager.cpu_for_workers()) if total_num <= min_batch_size: return total_num if total_num <= min_batch * min_batch_size: return min_batch_size batch_size = min_batch_size while 1: if total_num / batch_size > max_batch: batch_size = int(batch_size * 1.5) else: break return int(batch_size) def _expand_task(self, task): metrics = get_metrics(task) if not metrics: return 0, [] total = len(metrics) batch_size = self._get_batch_size(total) logger.info( "Discovered total=%s metrics and dimentions in namespace=%s, " "region=%s for datainput=%s, batchsize=%s", total, task[acc.metric_namespace], task[tac.region], task[tac.datainput], batch_size) batched_tasks = [] for i in range(total / batch_size): batched_tasks.append(metrics[i * batch_size:(i + 1) * batch_size]) # Last batch if total > batch_size and total % batch_size < batch_size / 4: # Avoid too small batch size begin = total / batch_size * batch_size last_small_batch = metrics[begin:total] batched_tasks[-1].extend(last_small_batch) else: last_pos = total / batch_size * batch_size batched_tasks.append(metrics[last_pos:total]) expanded_tasks = [] for batch in batched_tasks: if not batch: continue new_task = dict(task) new_task[acc.metric_configs] = batch new_task[tac.aws_service] = tac.cloudwatch expanded_tasks.append(new_task) return total, expanded_tasks
class AWSKinesisConf(object): app_dir = scutil.get_app_path(op.abspath(__file__)) app_file = op.join(app_dir, "local", "app.conf") passwords = "passwords" passwords_file_w_path = op.join(app_dir, "local", passwords + ".conf") task_file = "aws_kinesis_tasks" task_file_w_path = op.join(app_dir, "local", task_file + ".conf") conf_file = "aws_kinesis" conf_file_w_path = op.join(app_dir, "local", conf_file + ".conf") def __init__(self): self.metas, self.stanza_configs = tacommon.get_modinput_configs() self.metas[tac.app_name] = tac.splunk_ta_aws def get_tasks(self): conf_mgr = cm.ConfManager(self.metas[tac.server_uri], self.metas[tac.session_key]) all_tasks = self._get_kinesis_tasks(conf_mgr) settings = conf_mgr.all_stanzas_as_dicts(self.conf_file, do_reload=False) # set logging level for our logger set_log_level(settings[tac.log_stanza][tac.log_level]) for task in all_tasks: task[tac.log_level] = settings[tac.log_stanza][tac.log_level] task.update(settings[tac.global_settings]) ackpt.clean_up_ckpt_for_deleted_data_input(all_tasks) return tacommon.handle_hec(all_tasks, "aws_kinesis") def _get_kinesis_tasks(self, conf_mgr): proxy_info = tpc.get_proxy_info(self.metas[tac.session_key]) stanzas = conf_mgr.all_stanzas(self.task_file, do_reload=False) tasks, creds = [], {} for stanza in stanzas: if scutil.is_true(stanza[tac.disabled]): continue # Normalize tac.account to tac.aws_account stanza[tac.aws_account] = stanza.get(tac.account) stanza[tac.aws_iam_role] = stanza.get(tac.aws_iam_role) tasks.extend(self._expand_tasks(stanza, creds, proxy_info)) return tasks def _expand_tasks(self, stanza, creds, proxy_info): names = stanza[akc.stream_names].split(",") stream_names = [] for name in names: name = name.strip() if name: stream_names.append(name) stanza.update(proxy_info) stanza.update(self.metas) try: client = akcommon.KinesisClient(stanza, logger) streams = client.describe_streams(stream_names) except Exception as e: if "ResourceNotFoundException" in e.message: logger.info( "stream=%s in region=%s has been deleted, please " "delete datainput=%s", stream_names, stanza[tac.region], stanza[tac.stanza_name]) return [] else: logger.error( "Failed to describe stream=%s in region=%s, for " "datainput=%s, ignore this datainput. error=%s", stream_names, stanza[tac.region], stanza[tac.stanza_name], traceback.format_exc()) return [] tasks = [] for stream_name in stream_names: for shard in streams[stream_name]["Shards"]: task = copy.copy(stanza) task[tac.datainput] = task[tac.stanza_name] task[tac.aws_service] = tac.kinesis task[akc.stream_name] = stream_name task[akc.shard_id] = shard["ShardId"] tasks.append(task) return tasks