def _iter_feature_terms(self, feature_def, dt_range): # Avoid convolve boundary problem if "convolve" in feature_def["func_list"]: if "convolve_radius" in feature_def: convolve_radius = feature_def["convolve_radius"] else: # compatibility for configparser-style rule convolve_radius = self.conf.getint("general", "evdb_convolve_radius") else: convolve_radius = 0 sense_offset = self._feature_bin_size * convolve_radius # datetimeindex.get_loc includes stop time (unlike other types!) # dtindex_offset remove the stop time dtindex_offset = self._feature_bin_size if "data_range" in feature_def: unit_term = config.str2dur(feature_def["data_range"]) if "sense_range" in feature_def: unit_diff = config.str2dur(feature_def["sense_range"]) else: unit_diff = unit_term else: # compatibility for configparser-style rule unit_term = config.getdur(self.conf, "general", "evdb_unit_term") unit_diff = config.getdur(self.conf, "general", "evdb_unit_diff") for dts, dte in dtutil.iter_term(dt_range, unit_diff): sense_dts = max(dt_range[0], dte - unit_term) yield ((dts, dte - dtindex_offset), (sense_dts - sense_offset, dte - dtindex_offset + sense_offset))
def _recur(dt, host, event_name): if not self.conf.has_option(section, "recurrence"): return if self.conf.getboolean(section, "recurrence"): if random.random() < self.conf.getfloat(section, "recur_p"): durmin = config.getdur(self.conf, section, "recur_dur_min") durmax = config.getdur(self.conf, section, "recur_dur_max") new_dt = self._dt_delta_rand(dt, durmin, durmax) _add_event(new_dt, host, event_name)
def all_args(conf): w_top_dt, w_end_dt = config.getterm(conf, "dag", "whole_term") term = config.getdur(conf, "dag", "unit_term") diff = config.getdur(conf, "dag", "unit_diff") l_args = [] top_dt = w_top_dt while top_dt < w_end_dt: end_dt = top_dt + term l_area = config.getlist(conf, "dag", "area") for area in l_area: l_args.append((conf, (top_dt, end_dt), area)) top_dt = top_dt + diff return l_args
def __init__(self, conf, dry=False): super().__init__(conf, dry=dry) src = conf["general"]["log_source"] if src == "amulog": from . import src_amulog args = [ config.getterm(conf, "general", "evdb_whole_term"), conf["database_amulog"]["source_conf"], conf["database_amulog"]["event_gid"], conf.getboolean("database_amulog", "use_anonymize_mapping") ] self.source = src_amulog.AmulogLoader(*args) else: raise NotImplementedError self._filter_rules = config.getlist(conf, "filter", "rules") for method in self._filter_rules: assert method in filter_log.FUNCTIONS self.evdb = self._init_evdb(conf, "log_dbname") # dst = conf["general"]["evdb"] # if dst == "influx": # dbname = conf["database_influx"]["log_dbname"] # from . import influx # self.evdb = influx.init_influx(conf, dbname, df=False) # # self.evdb_df = influx.init_influx(conf, dbname, df = True) # else: # raise NotImplementedError self._lf = None if len(self._filter_rules) > 0: self._lf = filter_log.init_logfilter(conf, self.source) self._feature_unit_diff = config.getdur(conf, "general", "evdb_unit_diff") self._given_amulog_database = conf["database_amulog"]["given_amulog_database"]
def init_logfilter(conf, source): kwargs = dict(conf["filter"]) kwargs["pre_count"] = conf.getint("filter", "pre_count") kwargs["pre_term"] = config.getdur(conf, "filter", "pre_term") kwargs["fourier_sample_rule"] = [ tuple(config.str2dur(s) for s in dt_cond.split("_")) for dt_cond in config.gettuple(conf, "filter", "fourier_sample_rule")] kwargs["fourier_th_spec"] = conf.getfloat("filter", "fourier_th_spec") kwargs["fourier_th_eval"] = conf.getfloat("filter", "fourier_th_eval") kwargs["fourier_th_restore"] = conf.getfloat("filter", "fourier_th_restore") kwargs["fourier_peak_order"] = conf.getint("filter", "fourier_peak_order") kwargs["corr_sample_rule"] = [ tuple(config.str2dur(s) for s in dt_cond.split("_")) for dt_cond in config.gettuple(conf, "filter", "corr_sample_rule")] kwargs["corr_th"] = conf.getfloat("filter", "corr_th") kwargs["corr_diff"] = [config.str2dur(diffstr) for diffstr in config.gettuple(conf, "filter", "corr_diff")] kwargs["linear_sample_rule"] = [ tuple(config.str2dur(s) for s in dt_cond.split("_")) for dt_cond in config.gettuple(conf, "filter", "linear_sample_rule")] kwargs["linear_count"] = conf.getint("filter", "linear_count") kwargs["linear_th"] = conf.getfloat("filter", "linear_th") return LogFilter(source, **kwargs)
def __init__(self, conf, dry=False): self.conf = conf self.dry = dry src = conf["general"]["log_source"] if src == "amulog": from . import source_amulog args = [config.getterm(conf, "general", "evdb_whole_term"), conf["database_amulog"]["source_conf"], conf["database_amulog"]["event_gid"]] self.source = source_amulog.AmulogLoader(*args) else: raise NotImplementedError self._filter_rules = config.getlist(conf, "filter", "rules") for method in self._filter_rules: assert method in filter_log.FUNCTIONS dst = conf["general"]["evdb"] if dst == "influx": dbname = conf["database_influx"]["log_dbname"] from . import influx self.evdb = influx.init_influx(conf, dbname, df=False) # self.evdb_df = influx.init_influx(conf, dbname, df = True) else: raise NotImplementedError self._lf = filter_log.init_logfilter(conf, self.source) self._feature_unit_diff = config.getdur(conf, "general", "evdb_unit_diff")
def __init__(self, conf): self._rows = conf.getint("database_rrd", "rows") self._cf = conf["database_rrd"]["cf"] self._correct_roundup = conf.getboolean("database_rrd", "correct_roundup") self._binsize = int( config.getdur(conf, "database_rrd", "binsize").total_seconds())
def _generate_log(self, event): dt = event[0] host = event[1] event_name = event[2] info = event[3] for log_name in config.gettuple(self.conf, "event_" + event_name, "logs"): section = "log_" + log_name mode = self.conf.get(section, "mode") form = self.conf.get(section, "format") mes = form while True: match = self._var_re.search(mes) if match is None: break var_type = match.group().strip("$") if var_type in info.keys(): var_string = info[var_type] elif var_type == "pid": var_string = str(random.randint(1, 65535)) elif var_type == "host": var_string = host else: raise ValueError mes = "".join( (mes[:match.start()] + var_string + mes[match.end():])) if mode == "each": self.l_log.append((dt, host, mes)) elif mode == "delay_rand": delay_min = config.getdur(self.conf, section, "delay_min") delay_max = config.getdur(self.conf, section, "delay_max") log_dt = self._dt_delta_rand(dt, delay_min, delay_max) self.l_log.append((log_dt, host, mes)) elif mode == "drop_rand": drop_p = self.conf.getfloat(section, "drop_p") if random.random() > drop_p: self.l_log.append((dt, host, mes)) elif mode == "other_host_rand": l_host = [] for t_group in config.gettuple(self.conf, section, "groups"): for t_host in self.d_host[t_group]: if not t_host == host: l_host.append(t_host) self.l_log.append((dt, random.choice(l_host), mes))
def load_event_log_all(conf, dt_range, area, binarize, d_el=None): if d_el is None: from .source import evgen_log el = evgen_log.LogEventLoader(conf) else: el = d_el[SRCCLS_LOG] method = conf.get("dag", "ci_bin_method") ci_bin_size = config.getdur(conf, "dag", "ci_bin_size") ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff") for evdef in el.iter_evdef(dt_range, area): measure, tags = evdef.series() df = load_event(measure, tags, dt_range, ci_bin_size, ci_bin_diff, method, binarize, el) if df is not None: yield evdef, df
def make_tsdb(ns): from . import tsdb conf = open_logdag_config(ns) term = config.getdur(conf, "database_ts", "unit_term") diff = config.getdur(conf, "database_ts", "unit_diff") l_args = arguments.all_terms(conf, term, diff) timer = common.Timer("mk-tsdb task", output=_logger) timer.start() p = ns.parallel if p > 1: for args in l_args: tsdb.log2ts_pal(*args, pal=p) else: for args in l_args: tsdb.log2ts(*args) timer.stop()
def load_merged_events(conf, dt_range, area, l_evdef, d_el): """for visualization""" areatest = AreaTest(conf) ci_bin_method = conf.get("dag", "ci_bin_method") ci_bin_size = config.getdur(conf, "dag", "ci_bin_size") ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff") l_df = [] for idx, evdef in enumerate(l_evdef): tmp_df = _load_merged_event(conf, dt_range, area, evdef, areatest, ci_bin_size, ci_bin_diff, ci_bin_method, d_el) if tmp_df is None: raise ValueError("no time-series for {0}".format(evdef)) tmp_df.columns = [idx] l_df.append(tmp_df) return pd.concat(l_df, axis=1)
def load_values(conf, gid, host, dt_range): method = conf.get("dag", "ci_bin_method") ci_bin_size = config.getdur(conf, "dag", "ci_bin_size") ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff") td = tsdb.TimeSeriesDB(conf) kwargs = {"dts": dt_range[0], "dte": dt_range[1], "gid": gid, "host": host} l_dt = [dt for dt in td.iter_ts(**kwargs)] if method == "sequential": array = dtutil.discretize_sequential(l_dt, dt_range, ci_bin_size, False) elif method == "slide": array = dtutil.discretize_slide(l_dt, dt_range, ci_bin_diff, ci_bin_size, False) elif method == "radius": ci_bin_radius = 0.5 * ci_bin_size array = dtutil.discretize_radius(l_dt, dt_range, ci_bin_diff, ci_bin_radius, False) return array
def load_event_log_all(conf, dt_range, area, d_el=None): if d_el is None: from .source import evgen_log el = evgen_log.LogEventLoader(conf) else: el = d_el[SRCCLS_LOG] areatest = AreaTest(conf) method = conf.get("dag", "ci_bin_method") ci_bin_size = config.getdur(conf, "dag", "ci_bin_size") ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff") for evdef in el.iter_evdef(dt_range): measure, tags = evdef.series() if not areatest.test(area, tags["host"]): continue df = load_event(measure, tags, dt_range, ci_bin_size, ci_bin_diff, method, el) if df is not None: yield evdef, df
def load_event_snmp_all(conf, dt_range, area, d_el=None): if d_el is None: from .source import evgen_snmp el = evgen_snmp.SNMPEventLoader(conf) else: el = d_el["snmp"] areatest = AreaTest(conf) method = conf.get("dag", "ci_bin_method") ci_bin_size = config.getdur(conf, "dag", "ci_bin_size") ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff") l_feature_name = config.getlist(conf, "dag", "snmp_features") if len(l_feature_name) == 0: l_feature_name = el.all_feature() for evdef in el.iter_evdef(l_feature_name): measure, tags = evdef.series() if not areatest.test(area, tags["host"]): continue df = load_event(measure, tags, dt_range, ci_bin_size, ci_bin_diff, method, el) if df is not None: yield evdef, df
def all_args(conf): amulog_conf = config.open_config(conf["database_amulog"]["source_conf"]) from amulog import log_db ld = log_db.LogData(amulog_conf) w_top_dt, w_end_dt = config.getterm(conf, "dag", "whole_term") term = config.getdur(conf, "dag", "unit_term") diff = config.getdur(conf, "dag", "unit_diff") l_args = [] top_dt = w_top_dt while top_dt < w_end_dt: end_dt = top_dt + term l_area = config.getlist(conf, "dag", "area") if "each" in l_area: l_area.pop(l_area.index("each")) l_area += [ "host_" + host for host in ld.whole_host(top_dt, end_dt) ] for area in l_area: l_args.append((conf, (top_dt, end_dt), area)) top_dt = top_dt + diff return l_args
def make_evdb_log_all(ns): conf = open_logdag_config(ns) dump_org = ns.org dry = ns.dry timer = common.Timer("make-evdb-log task", output=_logger) timer.start() from . import evgen_log w_term = config.getterm(conf, "general", "evdb_whole_term") term = config.getdur(conf, "general", "evdb_unit_diff") el = evgen_log.LogEventLoader(conf, dry=dry) for dt_range in dtutil.iter_term(w_term, term): el.read(dt_range, dump_org=dump_org) timer.lap_diff("{0}".format(dt_range)) timer.stop()
def draw_graph_diff(ns): conf_fn1, conf_fn2 = ns.confs conf1 = arguments.open_logdag_config(conf_fn1) conf2 = arguments.open_logdag_config(conf_fn2) lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) dts = dtutil.shortstr2dt(ns.timestr) dte = dts + config.getdur(conf1, "dag", "unit_term") output = ns.filename from . import comparison cevmap, cgraph = comparison.edge_set_diff(conf1, conf2, (dts, dte)) from . import draw rgraph = draw.relabel_nodes(cgraph, cevmap) draw.graph_nx(output, rgraph) print(output)
def filter_periodic(conf, ld, l_dt, dt_range, evdef, method): """Return True and the interval if a_cnt is periodic.""" ret_false = False, None, None gid_name = conf.get("dag", "event_gid") p_cnt = conf.getint("filter", "pre_count") p_term = config.getdur(conf, "filter", "pre_term") # preliminary test if len(l_dt) < p_cnt: _logger.debug("time-series count too small, skip") return ret_false elif max(l_dt) - min(l_dt) < p_term: _logger.debug("time-series range too small, skip") return ret_false # periodicity test for dt_cond in config.gettuple(conf, "filter", "sample_rule"): dt_length, binsize = [config.str2dur(s) for s in dt_cond.split("_")] if (dt_range[1] - dt_range[0]) == dt_length: temp_l_dt = l_dt else: temp_l_dt = reload_ts(ld, evdef, dt_length, dt_range, gid_name) a_cnt = dtutil.discretize_sequential(temp_l_dt, dt_range, binsize, binarize=False) remain_dt = None if method == "remove": flag, interval = period.fourier_remove(conf, a_cnt, binsize) elif method == "replace": flag, remain_array, interval = period.fourier_replace( conf, a_cnt, binsize) if remain_array is not None: remain_dt = revert_event(remain_array, dt_range, binsize) elif method == "corr": flag, interval = period.periodic_corr(conf, a_cnt, binsize) else: raise NotImplementedError if flag: return flag, remain_dt, interval return ret_false
def draw_graph_diff(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) dts = dtutil.shortstr2dt(ns.timestr) dte = dts + config.getdur(conf1, "dag", "unit_term") output = ns.filename from . import comp_conf cevmap, cgraph = comp_conf.edge_set_diff(conf1, conf2, (dts, dte)) from . import draw rgraph = draw.relabel_graph(conf1, cgraph, cevmap) draw.graph_nx(output, rgraph) print(output)
def filter_linear(conf, l_dt, dt_range): """Return True if a_cnt appear linearly.""" binsize = config.getdur(conf, "filter", "linear_binsize") threshold = conf.getfloat("filter", "linear_threshold") th_count = conf.getint("filter", "linear_count") if len(l_dt) < th_count: return False # generate time-series cumulative sum length = (dt_range[1] - dt_range[0]).total_seconds() bin_length = binsize.total_seconds() bins = math.ceil(1.0 * length / bin_length) a_stat = np.array([0] * int(bins)) for dt in l_dt: cnt = int((dt - dt_range[0]).total_seconds() / bin_length) assert cnt < len(a_stat) a_stat[cnt:] += 1 a_linear = np.linspace(0, len(l_dt), bins, endpoint=False) val = sum((a_stat - a_linear)**2) / (bins * len(l_dt)) return val < threshold
def jobname2args(name, conf): area, dtstr = name.split("_", 1) dts = dtutil.shortstr2dt(dtstr) term = config.getdur(conf, "dag", "unit_term") dte = dts + term return conf, (dts, dte), area
def __init__(self, conf, parallel=None, dry=False): super().__init__(conf, dry=dry) self.parallel = parallel self._srcdb = conf["general"]["snmp_source"] if self._srcdb == "rrd": from . import src_rrd self.source = src_rrd.RRDLoader(conf) elif self._srcdb == "influx": source_dbname = conf["database_influx"]["snmp_source_dbname"] from . import influx self.source = influx.init_influx(conf, source_dbname, df=False) else: raise NotImplementedError self.evdb = self._init_evdb(conf, "snmp_dbname") # self._dstdb = conf["general"]["evdb"] # if self._dstdb == "influx": # dbname = conf["database_influx"]["snmp_dbname"] # from . import influx # self.evdb = influx.init_influx(conf, dbname, df=False) # else: # raise NotImplementedError self._ha = host_alias.HostAlias(conf["general"]["host_alias_filename"]) snmp_def = conf["general"]["snmp_feature_def"] with open(snmp_def, "r") as f: jsobj = json.load(f) # self._d_source: list of dict: seriesdef # seriesdef keys: filelist, host, mod_cls, mod_id self._d_source = jsobj["source"] self._d_vsourcedef = jsobj["vsource"] self._init_vsource() if isinstance(jsobj["feature"], list): self._d_feature = jsobj["feature"] elif isinstance(jsobj["feature"], dict): # for backward compatibility with configparser-style rule self._d_feature = [] for name, fdef in jsobj["feature"].items(): fdef["name"] = name self._d_feature.append(fdef) #self._d_feature = jsobj["feature"] self._d_rfeature = defaultdict(list) for fdef in self._d_feature: src = fdef["source"] #d = {"name": fdef["name"], # "column": self._d_feature[name]["column"], # "func_list": self._d_feature[name]["func_list"]} self._d_rfeature[src].append(fdef) self._feature_unit_term = config.getdur(conf, "general", "evdb_unit_term") self._feature_unit_diff = config.getdur(conf, "general", "evdb_unit_diff") self._feature_bin_size = config.getdur(conf, "general", "evdb_binsize") self._feature_convolve_radius = conf.getint("general", "evdb_convolve_radius") self._mproc = None
def edge_temporal_sort(ldag, time_condition, search_condition=None, reverse=False, view_context="edge", load_cache=True, graph=None): assert "time" in time_condition or "time_range" in time_condition if graph is None: graph = ldag.graph from amulog import config ci_bin_size = config.getdur(ldag.conf, "dag", "ci_bin_size") nodes = set() # nodes with any adjacent edges for edge in graph.edges(): nodes.add(edge[0]) nodes.add(edge[1]) df_ts = ldag.node_ts(list(nodes)) if "time" in time_condition: dt = time_condition["time"] sr_diff_td = (df_ts.index.to_series() + (0.5 * ci_bin_size) - dt).abs() sr_diff = sr_diff_td.map(lambda x: x.total_seconds()) df_score = df_ts.apply(lambda x: x * sr_diff / sum(x)) else: dts, dte = time_condition["time_range"] diff = [] for tmp_ts in df_ts.index: ts = tmp_ts + 0.5 * ci_bin_size if ts < dts: diff.append((dts - ts).total_seconds()) elif ts > dte: diff.append((ts - dte).total_seconds()) else: # dts <= ts <= dte diff.append(float(0)) sr_diff = pd.Series(diff, index=df_ts.index) df_score = df_ts.apply(lambda x: x * sr_diff / sum(x)) items = [] edges = [ edge for edge in showdag.remove_edge_duplication(ldag.graph.edges(), ldag) if showdag.check_conditions(edge, ldag, search_condition) ] for edge in edges: score = (sum(df_score[edge[0]]) + sum(df_score[edge[1]])) / 2 items.append((edge, score)) l_buf = [] prev = None for edge, score in sorted(items, key=lambda x: x[1], reverse=reverse): if showdag.check_conditions(edge, ldag, search_condition): if score != prev: if prev is not None: l_buf.append("") l_buf.append("[average_diff_sec={0}]".format(score)) prev = score msg = showdag.edge_view(edge, ldag, context=view_context, load_cache=load_cache, graph=graph) l_buf.append(msg) return "\n".join(l_buf)
def _iter_evdb_term(conf): w_term = config.getterm(conf, "general", "evdb_whole_term") term = config.getdur(conf, "general", "evdb_unit_diff") return dtutil.iter_term(w_term, term)