def similar_block_log(conf, top_dt, end_dt, area, ignore_same = True): #assert conf.get("search", "method") == "log" ld = log_db.LogData(conf) dagc = DAGComparison(conf, area) edict = dagc.data_for_cond(top_dt, end_dt, area) if edict is None: edict = {} for line in ld.iter_lines(top_dt = top_dt, end_dt = end_dt, area = area): weid = dagc.w_evmap.process_line(line) edict[weid] = edict.get(weid, 0) + 1 l_weid = edict.keys() src_evv = _event_vector(l_weid, edict, dagc) print("{0} - {1} ({2}) : {3}".format(top_dt, end_dt, area, src_evv)) src_dir = conf.get("dag", "output_dir") l_r = pcresult.results_in_area(conf, src_dir, area) result = [] for r in l_r: if ignore_same and (r.end_dt > top_dt and r.top_dt < end_dt): # ignore if common term included pass else: edict_r = dagc.data_for_r(r) r_evv = _event_vector(l_weid, edict_r, dagc) print("{0} - {1} ({2}) : {3}".format(r.top_dt, r.end_dt, r.area, r_evv)) dist = _evv_distance(src_evv, r_evv) result.append((r, dist)) return result
def test(conf, area): start_dt = datetime.datetime.now() _logger.info("var_cls task start") ld = log_db.LogData(conf) cv = CompareVariable(conf) w_term = conf.getterm("dag", "whole_term") if w_term is None: w_term = ld.whole_term() term = datetime.timedelta(days=1) diff = datetime.timedelta(days=1) for top_dt, end_dt in dtutil.iter_term(w_term, term, diff): _logger.info("loading log data ({0} - {1})".format(top_dt, end_dt)) for line in ld.iter_lines(top_dt=top_dt, end_dt=end_dt, area=area): cv.add(line) _logger.info("log data loading done") _logger.info("{0} events found".format(len(cv.evmap))) cv.process() _logger.info("event relation estimating done") print cv.show_result() end_dt = datetime.datetime.now() _logger.info("var_cls task done ({0})".format(end_dt - start_dt))
def count_edge_label_extype(conf): ll = init_ltlabel(conf) ld = log_db.LogData(conf) import pcresult s_keys = set() d_extype = defaultdict(int) src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): r = pcresult.PCOutput(conf).load(fp) dedges, udedges = r._separate_edges() for edge in dedges + udedges: l_group = [r._label_group_ltg(r.evmap.info(eid).gid) for eid in edge] for group in l_group: s_keys.add(group) if l_group[0] == l_group[1]: d_extype[tuple(l_group)] += 1 else: d_extype[(l_group[0], l_group[1])] += 1 d_extype[(l_group[1], l_group[0])] += 1 table = [] table.append(["group"] + list(s_keys)) for key1 in s_keys: buf = [key1] for key2 in s_keys: cnt = d_extype[(key1, key2)] buf.append(cnt) table.append(buf) print common.cli_table(table)
def pc_all_args(conf): ld = log_db.LogData(conf) w_term = conf.getterm("dag", "whole_term") if w_term is None: w_top_dt, w_end_dt = ld.whole_term() else: w_top_dt, w_end_dt = w_term term = conf.getdur("dag", "unit_term") diff = conf.getdur("dag", "unit_diff") dur = conf.getdur("dag", "stat_bin") l_args = [] top_dt = w_top_dt while top_dt < w_end_dt: end_dt = top_dt + term l_area = conf.getlist("dag", "area") if "each" in l_area: l_area.pop(l_area.index("each")) l_area += [ "host_" + host for host in ld.whole_host(top_dt, end_dt) ] for area in l_area: l_args.append((conf, top_dt, end_dt, dur, area)) top_dt = top_dt + diff return l_args
def show_diff_event(conf1, conf2): d_diff = diff_event(conf1, conf2) import log2event import log_db ld = log_db.LogData(conf1) import lt_label ll = lt_label.init_ltlabel(conf1) # cnt_rm_type = 0; cnt_rm_ev = 0 # cnt_rp_type = 0; cnt_tp_ev = 0 # type_rm = log2event.EventDefinitionMap.type_normal # type_rp = log2event.EventDefinitionMap.type_periodic_remainder # for evdef, cnt in d_diff.iteritems(): # if evdef = for evdef, cnt in sorted(d_diff.iteritems(), key = lambda x: x[1], reverse = True): print cnt, evdef print for evdef, cnt in sorted(d_diff.iteritems(), key = lambda x: x[1], reverse = True): print cnt, log2event.EventDefinitionMap.get_str(evdef) print " " + ld.show_ltgroup(evdef.gid) print
def test_ltlabel(conf): def output(ld, ltgid, label, group): if label is None: label = str(label) return " ".join((group, label, ld.show_ltgroup(ltgid))) ld = log_db.LogData(conf) ll = init_ltlabel(conf) d_buf = {} buf_none = [] for ltgid in ld.iter_ltgid(): l_gid = ld.ltg_members(ltgid) if len(l_gid) == 1: #label = ll.get_lt_label(ltgid, ld.ltg_members(ltgid)) label = ll.get_ltg_label(ltgid, ld.ltg_members(ltgid)) group = ll.get_group(label) else: label = ll.get_ltg_label(ltgid, ld.ltg_members(ltgid)) group = ll.get_group(label) if label is None: buf_none.append(output(ld, ltgid, str(label), group)) else: d_buf.setdefault(label, []).append(output(ld, ltgid, label, group)) for k, buf in sorted(d_buf.iteritems()): print "\n".join(buf) print print "\n".join(buf_none)
def log2event(conf, top_dt, end_dt, dur, area): ld = log_db.LogData(conf) ltf = ltfilter.IDFilter(conf.getlist("dag", "use_filter")) evmap = LogEventIDMap() edict = {} # key : eid, val : nodestat.EventSequence if area == "all": iterobj = ld.iter_lines(top_dt=top_dt, end_dt=end_dt) elif area[:5] == "host_": host = area[5:] iterobj = ld.iter_lines(top_dt=top_dt, end_dt=end_dt, host=host) else: iterobj = ld.iter_lines(top_dt=top_dt, end_dt=end_dt, area=area) for line in iterobj: if not ltf.isremoved(line.lt.ltid): ev = nodestat.Event(line.dt, 1) eid = evmap.eid(line) ev.key = line.dt ev.val = 1 if not edict.has_key(eid): edict[eid] = nodestat.EventSequence(eid, \ top_dt, end_dt, dur, maxval=2, default=0) edict[eid].add_event(ev) return edict, evmap
def whole_term(conf, ld=None): w_term = conf.getterm("dag", "whole_term") if w_term is None: if ld is None: ld = log_db.LogData(conf) return ld.whole_term() else: return w_term
def add_db(conf, l_event, verbose, reset_db): ld = log_db.LogData(conf, edit = True, reset_db = reset_db) ld.init_ltmanager() lp = logparser.LogParser(conf) ha = host_alias.HostAlias(conf) for eid, t in l_event: msg = message(eid, t) log_db.process_line(msg, ld, lp, ha) if verbose: print msg
def count_ltlabel(conf): ld = log_db.LogData(conf) ll = init_ltlabel(conf) default_label = conf.get("visual", "ltlabel_default_label") default_group = conf.get("visual", "ltlabel_default_group") d_lt_group = defaultdict(int) d_lt_label = defaultdict(int) d_line_group = defaultdict(int) d_line_label = defaultdict(int) for ltgid in ld.iter_ltgid(): l_gid = ld.ltg_members(ltgid) l_lt = ld.ltg_members(ltgid) label = ll.get_ltg_label(ltgid, l_lt) group = ll.get_group(label) if label is None: label = default_label group = default_group d_lt_group[group] += 1 d_lt_label[label] += 1 cnt_line = sum(lt.cnt for lt in l_lt) d_line_group[group] += cnt_line d_line_label[label] += cnt_line print("all templates : {0}".format(sum(d_lt_group.values()))) print("all lines : {0}".format(sum(d_line_group.values()))) print for group, l_label in ll.d_group.iteritems(): if d_lt_group.has_key(group): cnt_group = d_lt_group.pop(group) lines_group = d_line_group.pop(group) else: cnt_group = 0; lines_group = 0 #cnt_group = d_lt_group[group] #lines_group = d_line_group[group] print "group {0} : {1} templates, {2} lines".format(group, cnt_group, lines_group) for label in l_label: if d_lt_label.has_key(label): cnt_label = d_lt_label.pop(label) lines_label = d_line_label.pop(label) else: cnt_label = 0; lines_label = 0 #cnt_label = d_lt_label[label] #lines_label = d_line_label[label] print " label {0} : {1} templates, {2} lines".format(label, cnt_label, lines_label) print print d_line_group; print d_line_label
def update(self, conf): ld = log_db.LogData(conf) db_top_dt, db_end_dt = ld.dt_term() if self._end_dt is not None and \ self._end_dt + self._binsize < db_end_dt: _logger.warning("New data is too small or not found") return self._evmap = log2event.generate_evmap(conf, ld, None, None) if self._end_dt is None: top_dt = dtutil.adj_sep(db_top_dt, self._binsize) else: top_dt = self._end_dt # The last bin will not be added, because it may be uncompleted end_dt = dtutil.adj_sep(db_end_dt, self._binsize) l_label = dtutil.label(top_dt, end_dt, self._binsize) _logger.info("updating changepoint data ({0} - {1})".format( top_dt, end_dt)) for eid in self._evmap.iter_eid(): evdef = self._evmap.info(eid) _logger.info("processing {0}".format(self._evmap.info_str(eid))) cf = self._load_cf(evdef) l_data, l_score = self._load_data(evdef) if cf is None: cf = self._new_cf() l_data = [] l_score = [] l_dt = [ line.dt for line in ld.iter_lines( **self._evmap.iterline_args(eid, top_dt, end_dt)) ] if len(l_dt) > 0: _logger.info("{0} messages in given term".format(len(l_dt))) l_val = dtutil.discretize(l_dt, l_label, binarize=False) for val in l_val: l_data.append(val) score = cf.update(val) l_score.append(score) self._dump_cf(evdef, cf) self._dump_data(evdef, l_data, l_score) else: _logger.info("no message found in processing term, passed") self._end_dt = end_dt self._dt_label += l_label if self._top_dt is None: self._top_dt = top_dt _logger.info("task completed")
def view(conf, ltid, ltgid, top_dt, end_dt, host, area, oflag): ld = log_db.LogData(conf) for e in ld.iter_lines(ltid=ltid, ltgid=ltgid, top_dt=top_dt, end_dt=end_dt, host=host, area=area): if oflag: print e.restore_line() else: print e
def get_edict(conf, top_dt, end_dt, dur, area): filepath = edict_filepath(conf, top_dt, end_dt, dur, area) if os.path.exists(filepath): _logger.info("Preprocessed event data found ({0})".format(filepath)) edict, evmap = load_edict(filepath) else: init_edict_dir(conf) ld = log_db.LogData(conf) edict, evmap = log2event(conf, ld, top_dt, end_dt, area) edict, evmap = filter_edict(conf, edict, evmap, ld, top_dt, end_dt, area) filepath = edict_filepath(conf, top_dt, end_dt, dur, area) dump_edict(filepath, edict, evmap) return edict, evmap
def _init_event_stat(self, conf): ld = log_db.LogData(conf) w_top_dt, w_end_dt = pc_log.whole_term(conf, ld) gid_name = conf.get("dag", "event_gid") self.w_evmap = log2event.EventDefinitionMap(w_top_dt, w_end_dt, gid_name) src_dir = conf.get("dag", "output_dir") l_r = pcresult.results_in_area(conf, src_dir, self.area) for r in l_r: edict = {} for line in ld.iter_lines(top_dt = r.top_dt, end_dt = r.end_dt, area = r.area): weid = self.w_evmap.process_line(line) edict[weid] = edict.get(weid, 0) + 1 self.d_ev[(r.top_dt, r.end_dt, r.area)] = edict
def agg_log2event(conf, top_dt, end_dt, dur, area, fn): init_edict_dir(conf) ld = log_db.LogData(conf) org_edict, org_evmap = log2event(conf, ld, top_dt, end_dt, area) edict, evmap = filter_edict(conf, org_edict, org_evmap, ld, top_dt, end_dt, area) filepath = edict_filepath(conf, top_dt, end_dt, dur, area) dump_edict(filepath, edict, evmap) len_all_event = len(org_edict) len_event = len(edict) len_replace = sum(1 for evdef in evmap.iter_evdef() if evdef.type == evmap.type_periodic_remainder) with open(fn, "a") as f: f.write("{0}\t{1}\t{2}\t{3}\n".format(len_all_event, len_event, len_replace, filepath))
def event_label(conf): import log_db ld = log_db.LogData(conf) import lt_label ll = lt_label.init_ltlabel(conf) d_group = defaultdict(int) dirname = conf.get("dag", "event_dir") for fp in common.rep_dir(dirname): fn = fp.split("/")[-1] edict, evmap = log2event.load_edict(fp) for evdef in [evmap.info(k) for k in edict.keys()]: gid = evdef.gid l_lt = ld.ltg_members(gid) group = ll.get_ltg_group(gid, l_lt) d_group[group] += 1 return d_group
def list_all_gid(conf): import log_db import lt_label ld = log_db.LogData(conf) ll = lt_label.init_ltlabel(conf) s_gid = set() for r in pcresult.results(conf): for edge in r.graph.edges(): for gid in [r.evmap.info(eid).gid for eid in edge]: s_gid.add(gid) for gid in s_gid: l_ltline = ld.ltg_members(gid) print( "gid {0} : {1} in {2}".format(gid, ll.get_ltg_label(gid, l_ltline)), ll.get_ltg_group(gid, l_ltline))
def __init__(self, conf, fflag): self.ld = log_db.LogData(conf) self.filename = conf.get("filter_self_corr", "indata_filename") w_term = conf.getterm("filter_self_corr", "term") if w_term is None: self.top_dt, self.end_dt = self.ld.whole_term() else: self.top_dt, self.end_dt = w_term self.l_dur = [config.str2dur(str_dur) for str_dur in conf.getlist("filter_self_corr", "dur")] self.binsize = conf.getdur("filter_self_corr", "bin_size") self.th = conf.getfloat("filter_self_corr", "threshold") self.d_result = {} self.d_info = {} self.fflag = fflag if self.loaded(): self.load()
def lt2trainsource(conf, fn=None): import log_db ld = log_db.LogData(conf) sym = conf.get("log_template", "variable_symbol") if fn is None: fn = conf.get("log_template_crf", "train_filename") ret = [] for lt in ld.iter_lt(): l_train = [] tpl = lt.ltw ex = ld.iter_lines(ltid=lt.ltid).next().l_w for w_tpl, w_ex in zip(tpl, ex): if w_tpl == sym: l_train.append((w_ex, "V")) else: l_train.append((w_tpl, "D")) ret.append("\n".join( ["{0[0]} {0[1]} {0[1]}".format(train) for train in l_train])) with open(fn, "w") as f: f.write("\n\n".join(ret))
def search_edge_label_extype(conf, label1, label2): ll = init_ltlabel(conf) ld = log_db.LogData(conf) import pcresult src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): r = pcresult.PCOutput(conf).load(fp) rflag = False dedges, udedges = r._separate_edges() for edge in dedges + udedges: l_group = [r._label_group_ltg(r.evmap.info(eid).gid) for eid in edge] if (l_group[0] == label1 and l_group[1] == label2) or \ (l_group[1] == label1 and l_group[0] == label2): if not rflag: print("# {0}".format(r.filename)) rflag = True r._print_edge(edge, False) r._print_edge_lt(edge)
def show_diff_event_label(conf1, conf2): d_diff = diff_event(conf1, conf2) import log_db ld = log_db.LogData(conf1) import lt_label ll = lt_label.init_ltlabel(conf1) d_group = defaultdict(int) for evdef in d_diff.keys(): gid = evdef.gid l_lt = ld.ltg_members(gid) group = ll.get_ltg_group(gid, l_lt) d_group[group] += 1 d_group_all = event_label(conf1) if len(d_group) == 0: print "return empty, is the config order right?" for group, cnt in d_group.items(): cnt_all = d_group_all[group] print group, cnt, "/", cnt_all
def test_log2event(conf): import pc_log ld = log_db.LogData(conf) for args in pc_log.pc_all_args(conf): top_dt = args[1] end_dt = args[2] dur = args[3] area = args[4] _logger.info("testing log2event({0} - {1} in {2})".format( top_dt, end_dt, area)) edict, evmap = get_edict(conf, top_dt, end_dt, dur, area) assert len(edict) == len(evmap) for eid in edict.keys(): print("Event {0} : {1}".format(eid, evmap.info_str(eid))) if evmap.info(eid).type == EventDefinitionMap.type_normal: print(evmap.info_repr(ld, eid)) else: print("\n".join([str(dt) for dt in edict[eid]])) print("\n".join( ["#" + w for w in evmap.info_repr(ld, eid).split("\n")])) print
def test_ltlabel(conf): def output(ld, ltgid, label): return " ".join((label, ld.show_ltgroup(ltgid))) ld = log_db.LogData(conf) ltconf_path = conf.get("visual", "ltlabel") if ltconf_path == "": ltconf_path = DEFAULT_LABEL_CONF ll = LTLabel(ltconf_path) d_buf = {} buf_none = [] for ltgid in ld.iter_ltgid(): label = ll.get_ltg_label(ltgid, ld.ltg_members(ltgid)) if label is None: buf_none.append(output(ld, ltgid, str(label))) else: d_buf.setdefault(label, []).append(output(ld, ltgid, label)) for k, buf in sorted(d_buf.iteritems()): print "\n".join(buf) print print "\n".join(buf_none)
def count_event_label(conf): import log2event ld = log_db.LogData(conf) ll = init_ltlabel(conf) d_label = defaultdict(int) d_group = defaultdict(int) dirname = conf.get("dag", "event_dir") for fp in common.rep_dir(dirname): fn = fp.split("/")[-1] edict, evmap = log2event.load_edict(fp) for eid, l_dt in edict.iteritems(): gid = evmap.info(eid).gid l_lt = ld.ltg_members(gid) label = ll.get_ltg_label(gid, l_lt) group = ll.get_group(label) d_label[label] += len(l_dt) d_group[group] += len(l_dt) print("all lines : {0}".format(sum(d_group.values()))) print for group, l_label in ll.d_group.iteritems(): if d_group.has_key(group): cnt_group = d_group.pop(group) else: cnt_group = 0 print("group {0}: {1} lines".format(group, cnt_group)) for label in l_label: if d_label.has_key(label): cnt_label = d_label.pop(label) else: cnt_label = 0 print(" label {0}: {1} lines".format(label, cnt_label)) print
def count_edge_label_detail(conf): ll = init_ltlabel(conf) ld = log_db.LogData(conf) import pcresult d_group = defaultdict(int) d_group_directed = defaultdict(int) d_group_intype = defaultdict(int) d_group_intype_directed = defaultdict(int) d_group_mean = defaultdict(int) d_group_mean_directed = defaultdict(int) import edge_filter ef = edge_filter.EdgeFilter(conf) src_dir = conf.get("dag", "output_dir") for fp in common.rep_dir(src_dir): _logger.info("count_edge_label_detail processing {0}".format(fp)) r = pcresult.PCOutput(conf).load(fp) dedges, udedges = r._separate_edges() for edge in dedges: cedge = [r.evmap.info(eid) for eid in edge] fflag = ef.isfiltered(cedge) l_group = [r._label_group_ltg(r.evmap.info(eid).gid) for eid in edge] iflag = (l_group[0] == l_group[1]) for group in l_group: d_group[group] += 1 d_group_directed[group] += 1 if iflag: d_group_intype[group] += 1 d_group_intype_directed[group] += 1 if not fflag: d_group_mean[group] += 1 d_group_mean_directed[group] += 1 for edge in udedges: cedge = [r.evmap.info(eid) for eid in edge] fflag = ef.isfiltered(cedge) l_group = [r._label_group_ltg(r.evmap.info(eid).gid) for eid in edge] iflag = (l_group[0] == l_group[1]) for group in l_group: d_group[group] += 1 if iflag: d_group_intype[group] += 1 if not fflag: d_group_mean[group] += 1 table = [["key", "all", "directed", "intype", "intype_directed", "important", "important_directed"]] for key in d_group.keys(): temp = [key] temp.append(d_group[key]) temp.append(d_group_directed[key]) temp.append(d_group_intype[key]) temp.append(d_group_intype_directed[key]) temp.append(d_group_mean[key]) temp.append(d_group_mean_directed[key]) table.append(temp) table.append(["total", sum(d_group.values()), sum(d_group_directed.values()), sum(d_group_intype.values()), sum(d_group_intype_directed.values()), sum(d_group_mean.values()), sum(d_group_mean_directed.values())]) print common.cli_table(table)
default=config.DEFAULT_CONFIG_NAME, help="configuration file") op.add_option("-l", "--limit", action="store", dest="show_limit", type="int", default=10, help="Limitation rows to show source log data") (options, args) = op.parse_args() if len(args) == 0: sys.exit(usage) mode = args[0] conf = config.open_config(options.conf) ld = log_db.LogData(conf) if mode == "show": show_all(ld) elif mode == "show-lt": show_lt(ld) elif mode == "show-group": if len(args) <= 1: show_ltg(ld, None) else: show_ltg(ld, int(args[1])) elif mode == "show-sort": show_sort(ld) elif mode == "breakdown": if len(args) <= 1: sys.exit("give me ltid, following \"{0}\"".format(mode)) ltid = int(args[1])
def _init_ld(self): if self.ld is None: import log_db self.ld = log_db.LogData(self.conf)
""".format(sys.argv[0]).strip() op = optparse.OptionParser(usage) op.add_option("-c", "--config", action="store", dest="conf", type="string", default=config.DEFAULT_CONFIG_NAME, help="configuration file") op.add_option("-l", "--limit", action="store", dest="show_limit", type="int", default=5, help="Limitation rows to show source log data") (options, args) = op.parse_args() if len(args) == 0: sys.exit(usage) mode = args.pop(0) conf = config.open_config(options.conf) ld = log_db.LogData(conf, edit = True) if mode == "export": export(ld) elif mode == "show": show_all(ld) elif mode == "show-lt": show_lt(ld) elif mode == "show-group": if len(args) == 0: show_ltg(ld, None) else: show_ltg(ld, int(args[1])) elif mode == "show-sort": show_sort(ld) elif mode == "breakdown": if len(args) == 0: