def show_diff_edges(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import comp_conf am = arguments.ArgumentManager(conf1) am.load() for dt_range in sorted(am.iter_dt_range()): cevmap, cgraph = comp_conf.edge_set_diff(conf1, conf2, dt_range) buf_edges = [] for edge in cgraph.edges(): buf = "" src_info = cevmap.evdef(edge[0]) buf += "[gid={0[0]}, host = {0[1]}]".format(src_info) if showdag.isdirected(edge, cgraph): buf += " -> " else: buf += " <-> " dst_info = cevmap.evdef(edge[1]) buf += "[gid={0[0]}, host = {0[1]}]".format(dst_info) buf_edges.append(buf) if len(buf_edges) > 0: print("date: {0}".format(dt_range[0])) print("\n".join(buf_edges))
def edge_set_diff(conf1, conf2, dt_range, lor=None): """Edges exist in conf1, but not in conf2""" # cgraph_lor: A or B if lor is None: cevmap, cgraph_lor = edge_set_lor(conf1, conf2, dt_range) else: cevmap, cgraph_lor = lor am2 = arguments.ArgumentManager(conf2) am2.load() # cgraph2: B cgraph2 = nx.Graph() for args in am2.args_in_time(dt_range): r2 = showdag.LogDAG(args) r2.load() cgraph2 = _add_edges(cevmap, cgraph2, r2) # cgraph_diff: A and not B = (A or B) - B = cgraph_lor - cgraph2 cgraph_diff = nx.Graph() for edge in cgraph_lor.edges(): if cgraph2.has_edge(*edge): pass else: cgraph_diff.add_edge(*edge) return cevmap, cgraph_diff
def show_diff_direction(ns): conf_fn1, conf_fn2 = ns.confs conf1 = arguments.open_logdag_config(conf_fn1) conf2 = arguments.open_logdag_config(conf_fn2) lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) def _print_diff(_ret): for ev1, ev2, di1, di2 in _ret: print("{0} {1} | {2} {3}".format(ev1, di1, di2, ev2)) cnt = 0 from . import comparison am = arguments.ArgumentManager(conf1) am.load() if ns.argname is None: for dt_range in sorted(am.iter_dt_range()): ret = comparison.edge_direction_diff(conf1, conf2, dt_range) cnt += len(ret) if len(ret) > 0: print(dt_range) _print_diff(ret) print("") else: args = am.jobname2args(ns.argname, conf1) dt_range = args[2] ret = comparison.edge_direction_diff(conf1, conf2, dt_range) cnt += len(ret) _print_diff(ret) print(cnt)
def show_clusters(conf, feature="edge", weight="idf", clustering_method="kmeans", n_clusters=None, cause_topn=10): am = arguments.ArgumentManager(conf) am.load() if n_clusters is None: n_clusters = int(math.sqrt(len(am))) sim = init_similarity(conf, feature, am=am, weight=weight) cls_kwargs = {"n_clusters": n_clusters} d_cluster = sim.clustering(clustering_method, cls_kwargs=cls_kwargs) l_buf = [] for cid, jobnames in d_cluster.items(): l_buf.append("[cluster {0}]: {1} ({2})".format(cid, jobnames, len(jobnames))) if len(jobnames) > 2: causes = list(sim.similarity_causes(jobnames, topn=cause_topn)) l_buf.append("main components: {0}".format(causes)) l_buf.append("") return "\n".join(l_buf)
def test_load_asis(self): conf = config.open_config(arguments.DEFAULT_CONFIG, base_default=False) conf["general"]["evdb"] = "sql" conf["database_sql"]["database"] = "sqlite3" conf["database_amulog"]["source_conf"] = self._path_amulogconf conf["database_sql"]["sqlite3_filename"] = self._path_testdb conf["filter"]["rules"] = "" from logdag import dtutil from logdag.source import evgen_log w_term = self._whole_term size = config.str2dur("1d") el = evgen_log.LogEventLoader(conf) for dt_range in dtutil.iter_term(w_term, size): el.read(dt_range, dump_org=False) am = arguments.ArgumentManager(conf) am.generate(arguments.all_args) from logdag import makedag edge_cnt = 0 for args in am: ldag = makedag.makedag_main(args, do_dump=False) edge_cnt += ldag.number_of_edges() assert edge_cnt > 0
def show_diff_info(ns): conf_fn1, conf_fn2 = ns.confs conf1 = arguments.open_logdag_config(conf_fn1) conf2 = arguments.open_logdag_config(conf_fn2) lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import comparison d = defaultdict(int) am = arguments.ArgumentManager(conf1) am.load() for dt_range in am.iter_dt_range(): cevmap_common, cgraph_common = comparison.edge_set_common( conf1, conf2, dt_range) d["common"] += cgraph_common.number_of_edges() cevmap_lor, cgraph_lor = comparison.edge_set_lor( conf1, conf2, dt_range) d["lor"] += cgraph_lor.number_of_edges() cevmap_diff1, cgraph_diff1 = comparison.edge_set_diff(conf1, conf2, dt_range, lor=(cevmap_lor, cgraph_lor)) d["diff1"] += cgraph_diff1.number_of_edges() cevmap_diff2, cgraph_diff2 = comparison.edge_set_diff(conf2, conf1, dt_range, lor=(cevmap_lor, cgraph_lor)) d["diff2"] += cgraph_diff2.number_of_edges() print("Logical OR edges: {0}".format(d["lor"])) print("Common edges: {0}".format(d["common"])) print("Edges only found in {0}: {1}".format(ns.confs[0], d["diff1"])) print("Edges only found in {0}: {1}".format(ns.confs[1], d["diff2"]))
def search_similar_dag(ldag, feature="edge", weight="idf", dag_topn=10, cause_topn=10): am = arguments.ArgumentManager(ldag.conf) am.load() ldag_jobname = am.jobname(ldag.args) sim = init_similarity(ldag.conf, feature, am=am, weight=weight) d_val = {} for args in am: jobname = am.jobname(args) if jobname != ldag_jobname: d_val[jobname] = sim.similarity(ldag_jobname, jobname) l_buf = [] cnt = 0 for jobname, val in sorted(d_val.items(), key=lambda x: x[1], reverse=True): jobnames = [ldag_jobname, jobname] causes = list(sim.similarity_causes(jobnames, topn=cause_topn)) l_buf.append("{0} {1}: {2}".format(val, jobname, causes)) cnt += 1 if cnt >= dag_topn: break return "\n".join(l_buf)
def __init__(self, conf, am=None, use_cache=True, weight="none", smooth_idf=True): super().__init__(conf, am=am) self._conf = conf if am is None: self._am = arguments.ArgumentManager(conf) self._am.load() else: self._am = am self._all_jobnames = [self._am.jobname(args) for args in self._am] assert weight in ("none", "idf") self._weight = weight self._ec = EdgeCount(conf, am=am, use_cache=use_cache, smooth_idf=smooth_idf) self._counter = self._ec # to be overwritten if use_cache: if self.has_cache(): self.load_cache() else: self._matrix = self._space() self.dump_cache() else: self._matrix = self._space()
def show_diff_edges(ns): conf_fn1, conf_fn2 = ns.confs conf1 = arguments.open_logdag_config(conf_fn1) conf2 = arguments.open_logdag_config(conf_fn2) lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import comparison am = arguments.ArgumentManager(conf1) am.load() for dt_range in sorted(am.iter_dt_range()): cevmap, cgraph = comparison.edge_set_diff(conf1, conf2, dt_range) buf_edges = [] for edge in cgraph.edges(): buf = "" src_info = cevmap.evdef(edge[0]) buf += "[gid={0[0]}, host = {0[1]}]".format(src_info) if showdag.isdirected(edge, cgraph): buf += " -> " else: buf += " <-> " dst_info = cevmap.evdef(edge[1]) buf += "[gid={0[0]}, host = {0[1]}]".format(dst_info) buf_edges.append(buf) if len(buf_edges) > 0: print("date: {0}".format(dt_range[0])) print("\n".join(buf_edges))
def show_diff_direction(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) def _print_diff(ret): for ev1, ev2, di1, di2 in ret: print("{0} {1} | {2} {3}".format(ev1, di1, di2, ev2)) cnt = 0 from . import comp_conf am = arguments.ArgumentManager(conf1) am.load() if ns.argname is None: for dt_range in sorted(am.iter_dt_range()): ret = comp_conf.edge_direction_diff(conf1, conf2, dt_range) cnt += len(ret) if len(ret) > 0: print(dt_range) _print_diff(ret) print("") else: args = am.jobname2args(ns.argname, conf) dt_range = args[2] ret = comp_conf.edge_direction_diff(conf1, conf2, dt_range) cnt += len(ret) _print_diff(ret) print(cnt)
def edge_set_lor(conf1, conf2, dt_range): am1 = arguments.ArgumentManager(conf1) am1.load() am2 = arguments.ArgumentManager(conf2) am2.load() cevmap = log2event.EventDefinitionMap() cgraph = nx.Graph() for args in am1.args_in_time(dt_range): r1 = showdag.LogDAG(args) r1.load() cevmap = _add_nodes(cevmap, r1) cgraph = _add_edges(cevmap, cgraph, r1) for args in am2.args_in_time(dt_range): r2 = showdag.LogDAG(args) r2.load() cevmap = _add_nodes(cevmap, r2) cgraph = _add_edges(cevmap, cgraph, r2) return cevmap, cgraph
def edge_direction_diff(conf1, conf2, dt_range): def _get_direction(am, ev1, ev2, dt_range): for tmp_args in am.args_in_time(dt_range): r = showdag.LogDAG(tmp_args) r.load() if not r._evmap().has_evdef(ev1): continue if not r._evmap().has_evdef(ev2): continue n1, n2 = [r.evdef2node(ev) for ev in (ev1, ev2)] if (n1, n2) in r.graph.edges(): if (n2, n1) in r.graph.edges(): di = "-" else: di = "->" elif (n2, n1) in r.graph.edges(): di = "<-" return di else: raise ValueError("Edge {0} - {1} not found in {2}".format( ev1, ev2, r.name)) ret = [] am1 = arguments.ArgumentManager(conf1) am1.load() am2 = arguments.ArgumentManager(conf2) am2.load() for args in am1.args_in_time(dt_range): r2 = showdag.LogDAG(args) cevmap, cgraph = edge_set_common(conf1, conf2, dt_range) for edge in cgraph.edges(): ev1, ev2 = [cevmap.evdef(node) for node in edge] di1 = _get_direction(am1, ev1, ev2, dt_range) di2 = _get_direction(am2, ev1, ev2, dt_range) if di1 == di2: pass else: ret.append([ev1, ev2, di1, di2]) return ret
def edge_diff_gid(conf1, conf2): d_ltid = defaultdict(list) am = arguments.ArgumentManager(conf1) am.load() for dt_range in am.iter_dt_range(): cevmap, cgraph = edge_set_diff(conf1, conf2, dt_range) for edge in cgraph.edges(): timestr = dtutil.shortstr(dt_range[0]) src_evdef = cevmap.evdef(edge[0]) d_ltid[src_evdef.gid].append(timestr) dst_evdef = cevmap.evdef(edge[1]) d_ltid[dst_evdef.gid].append(timestr) return d_ltid
def edge_diff_gid_search(conf1, conf2, gid): # processing time too long!!! d_ltid = defaultdict(int) am = arguments.ArgumentManager(conf1) am.load() for dt_range in am.iter_dt_range(): cevmap, cgraph = edge_set_diff(conf1, conf2, dt_range) for edge in cgraph.edges(): src_evdef = cevmap.evdef(edge[0]) dst_evdef = cevmap.evdef(edge[1]) if gid in (src_evdef.gid, dst_evdef.gid): timestr = dtutil.shortstr(dt_range[0]) print("{0}: {1} - {2}".format(timestr, src_evdef, dst_evdef))
def edge_set_common(conf1, conf2, dt_range): am1 = arguments.ArgumentManager(conf1) am1.load() am2 = arguments.ArgumentManager(conf2) am2.load() temp_cevmap = log2event.EventDefinitionMap() temp_cgraph = nx.Graph() for args in am1.args_in_time(dt_range): r1 = showdag.LogDAG(args) r1.load() temp_cevmap = _add_nodes(temp_cevmap, r1) temp_cgraph = _add_edges(temp_cevmap, temp_cgraph, r1) cevmap = log2event.EventDefinitionMap() cgraph = nx.Graph() for args in am2.args_in_time(dt_range): r2 = showdag.LogDAG(args) r2.load() g = r2.graph.to_undirected() for edge in g.edges(): src_evdef, dst_evdef = r2.edge_evdef(edge) if temp_cevmap.has_evdef(src_evdef) \ and temp_cevmap.has_evdef(dst_evdef): temp_src_eid = temp_cevmap.get_eid(src_evdef) temp_dst_eid = temp_cevmap.get_eid(dst_evdef) if temp_cgraph.has_edge(temp_src_eid, temp_dst_eid): if cevmap.has_evdef(src_evdef): new_src_eid = cevmap.get_eid(src_evdef) else: new_src_eid = cevmap.add_evdef(src_evdef) if cevmap.has_evdef(dst_evdef): new_dst_eid = cevmap.get_eid(dst_evdef) else: new_dst_eid = cevmap.add_evdef(dst_evdef) cgraph.add_edge(new_src_eid, new_dst_eid) return cevmap, cgraph
def test_anonymize_restore(self): from amulog import __main__ as amulog_main from amulog import manager targets = amulog_main.get_targets_conf(self._amulog_conf) manager.process_files_online(self._amulog_conf, targets, reset_db=True) from amulog import anonymize am = anonymize.AnonymizeMapper(self._amulog_conf) am.anonymize(self._amulog_conf_anonymize) am.dump() conf = config.open_config(arguments.DEFAULT_CONFIG, base_default=False) conf["general"]["evdb"] = "sql" conf["database_sql"]["database"] = "sqlite3" conf["database_amulog"]["source_conf"] = self._path_amulogconf_anonymize conf["database_amulog"]["use_anonymize_mapping"] = "true" conf["database_amulog"]["given_amulog_database"] = "original" conf["database_sql"]["sqlite3_filename"] = self._path_testdb conf["dag"]["event_detail_cache"] = "false" conf["filter"]["rules"] = "" from logdag import dtutil from logdag.source import evgen_log w_term = self._whole_term size = config.str2dur("1d") el = evgen_log.LogEventLoader(conf) for dt_range in dtutil.iter_term(w_term, size): el.read(dt_range, dump_org=False) am = arguments.ArgumentManager(conf) am.generate(arguments.all_args) from logdag import makedag from logdag import showdag edge_cnt = 0 for args in am: conf["database_amulog"]["source_conf"] = self._path_amulogconf_anonymize ldag = makedag.makedag_main(args, do_dump=False) conf["database_amulog"]["source_conf"] = self._path_amulogconf showdag.show_subgraphs(ldag, "detail", load_cache=False, graph=None) # print(showdag.show_subgraphs(ldag, "detail", # load_cache=False, graph=None)) edge_cnt += ldag.number_of_edges() assert edge_cnt > 0
def separate_args(conf, tr): """Some troubles can appear among multiple days. This function separates DAG arguments and corresponding logs. """ from logdag import arguments am = arguments.ArgumentManager(conf) am.load() from amulog import log_db ld = log_db.LogData(arguments.open_amulog_config(conf)) d_args = defaultdict(list) for lid in tr.data["message"]: lm = ld.get_line(lid) for args in am.args_from_time(lm.dt): name = arguments.args2name(args) d_args[name].append(lm) return [(arguments.name2args(name, conf), l_lm) for name, l_lm in d_args.items()]
def dag_anomaly_score(conf, feature="edge", score="tfidf"): am = arguments.ArgumentManager(conf) am.load() counter = init_counter(conf, feature, am=am) d_score = {} for args in am: jobname = am.jobname(args) ldag = showdag.LogDAG(args) ldag.load() edges = showdag.remove_edge_duplication(ldag.graph.edges(), ldag) score = sum( edges_anomaly_score(edges, ldag, feature=feature, score=score, counter=counter, am=am)) d_score[jobname] = score return d_score
def show_dag_anomaly_score(ns): conf = open_logdag_config(ns) from . import edge_search d_score = edge_search.dag_anomaly_score(conf, feature=ns.feature, score=ns.score) if ns.order: am = arguments.ArgumentManager(conf) am.load() iterobj = [(am.jobname(args), d_score[am.jobname(args)]) for args in am] else: iterobj = sorted(d_score.items(), key=lambda x: x[1], reverse=(not ns.reverse)) for jobname, score in iterobj: print(jobname, score)
def show_graph_lor_edges(ns): conf_fn1, conf_fn2 = ns.confs conf1 = arguments.open_logdag_config(conf_fn1) conf2 = arguments.open_logdag_config(conf_fn2) lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import comparison edge_sum = 0 d_edges = {} am = arguments.ArgumentManager(conf1) am.load() for dt_range in am.iter_dt_range(): cevmap, cgraph = comparison.edge_set_lor(conf1, conf2, dt_range) edge_sum += cgraph.number_of_edges() d_edges[dt_range[0]] = cgraph.edges() print("logical disjunction edge num: {0}".format(edge_sum)) for k, v in sorted(d_edges.items(), key=lambda x: x[0]): print("{0}: {1}".format(k, len(v)))
def show_graph_lor_edges(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import comp_conf edge_sum = 0 d_edges = {} am = arguments.ArgumentManager(conf1) am.load() for dt_range in am.iter_dt_range(): cevmap, cgraph = comp_conf.edge_set_lor(conf1, conf2, dt_range) edge_sum += cgraph.number_of_edges() d_edges[dt_range[0]] = cgraph.edges() print("logical disjunction edge num: {0}".format(edge_sum)) for k, v in sorted(d_edges.items(), key=lambda x: x[0]): print("{0}: {1}".format(k, len(v)))
def show_diff_info(ns): l_conffp = ns.confs assert len(l_conffp) == 2 openconf = lambda c: config.open_config( c, ex_defaults=[arguments.DEFAULT_CONFIG]) conf1, conf2 = [openconf(c) for c in l_conffp] lv = logging.DEBUG if ns.debug else logging.INFO am_logger = logging.getLogger("amulog") config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv) from . import comp_conf d = defaultdict(int) am = arguments.ArgumentManager(conf1) am.load() for dt_range in am.iter_dt_range(): cevmap_common, cgraph_common = comp_conf.edge_set_common( conf1, conf2, dt_range) d["common"] += cgraph_common.number_of_edges() cevmap_lor, cgraph_lor = comp_conf.edge_set_lor(conf1, conf2, dt_range) d["lor"] += cgraph_lor.number_of_edges() cevmap_diff1, cgraph_diff1 = comp_conf.edge_set_diff(conf1, conf2, dt_range, lor=(cevmap_lor, cgraph_lor)) d["diff1"] += cgraph_diff1.number_of_edges() cevmap_diff2, cgraph_diff2 = comp_conf.edge_set_diff(conf2, conf1, dt_range, lor=(cevmap_lor, cgraph_lor)) d["diff2"] += cgraph_diff2.number_of_edges() print("Logical OR edges: {0}".format(d["lor"])) print("Common edges: {0}".format(d["common"])) print("Edges only found in {0}: {1}".format(ns.confs[0], d["diff1"])) print("Edges only found in {0}: {1}".format(ns.confs[1], d["diff2"]))
def show_sorted_edges(ldag, search_condition=None, feature="edge", score="tfidf", reverse=False, view_context="edge", load_cache=True, graph=None): am = arguments.ArgumentManager(ldag.conf) am.load() edges = [ edge for edge in showdag.remove_edge_duplication(ldag.graph.edges(), ldag) if showdag.check_conditions(edge, ldag, search_condition) ] items = list( edges_anomaly_score(edges, ldag, feature=feature, score=score, am=am)) order_reverse = reverse if score == "count" else not reverse l_buf = [] prev = None for edge, score in sorted(items, key=lambda x: x[1], reverse=order_reverse): if score != prev: if prev is not None: l_buf.append("") l_buf.append("[score={0}]".format(score)) prev = score msg = showdag.edge_view(edge, ldag, context=view_context, load_cache=load_cache, graph=graph) l_buf.append(msg) return "\n".join(l_buf)