def makeinput(conf, dt_range, area): evmap = EventDefinitionMap() evlist = [] sources = config.getlist(conf, "dag", "source") for evdef, df in load_event_all(sources, conf, dt_range, area): eid = evmap.add_evdef(evdef) df.columns = [ eid, ] evlist.append(df) msg = "loaded event {0} {1} (sum: {2})".format(eid, evmap.evdef(eid), df[eid].sum()) _logger.debug(msg) if len(evlist) == 0: _logger.warning("No data loaded") return None, None merge_sync = conf.getboolean("dag", "merge_syncevent") if merge_sync: merge_sync_rules = config.getlist(conf, "dag", "merge_syncevent_rules") evlist, evmap = merge_sync_event(evlist, evmap, merge_sync_rules) input_df = pd.concat(evlist, axis=1) return input_df, evmap
def init_pruner(conf): from amulog import config l_pruner = [] methods = config.getlist(conf, "pc_prune", "methods") for method in methods: if method == "topology": fp = conf.get("pc_prune", "single_network_file") l_pruner.append(SingleLayerTopology(fp)) elif method == "multi-topology": d_fp = {} files = config.getlist(conf, "pc_prune", "multi_network_file") for group, fp in [s.split(":") for s in files]: d_fp[group] = fp rulestr = config.getlist(conf, "pc_prune", "multi_network_group") d_rule = {} for rule in rulestr: group, layer = rule.split(":") d_rule[group] = layer l_pruner.append(MultiLayerTopology(d_fp, d_rule)) elif method == "independent": l_pruner.append(Independent()) else: raise NotImplementedError("invalid method {0}".format(method)) return l_pruner
def generate_json(iter_fp, conf): ha = host_alias.HostAlias(conf["general"]["host_alias_filename"]) sources = conf.options("source") d_group = defaultdict(list) for fp in iter_fp: try: tmp_host, mod_cls, mod_id = parse_filename(fp, conf) host = ha.resolve_host(tmp_host) if host is not None: d_group[(host, mod_cls, mod_id)].append(fp) except ValueError: pass d_source = defaultdict(list) for (host, mod_cls, mod_id), l_fp in d_group.items(): for source_name in sources: if mod_cls in config.getlist(conf, "source", source_name): d_source[source_name].append({ "filelist": l_fp, "host": host, "mod_cls": mod_cls, "mod_id": mod_id }) # generate vsource: dict with src, func d_vsource = {} for vsource_name in conf.options("vsource"): src, func = config.getlist(conf, "vsource", vsource_name) d = {"src": src, "func": func} d_vsource[vsource_name] = d # generate features: dict of feature # feature: name, source, column, func_list d_feature = {} for feature_name in sorted(conf.options("feature")): tmp = config.getlist(conf, "feature", feature_name) sourcename = tmp[0] keyfunc = tmp[1] l_postfunc = tmp[2:] d = { "name": feature_name, "source": sourcename, "column": keyfunc, "func_list": l_postfunc } if "in" in keyfunc: d["direction"] = "in" d["group"] = "interface" elif "out" in keyfunc: d["direction"] = "out" d["group"] = "interface" else: d["group"] = "system" d_feature[feature_name] = d js = {"source": d_source, "vsource": d_vsource, "feature": d_feature} return js
def __init__(self, conf, dry=False): self.conf = conf self.dry = dry src = conf["general"]["log_source"] if src == "amulog": from . import source_amulog args = [config.getterm(conf, "general", "evdb_whole_term"), conf["database_amulog"]["source_conf"], conf["database_amulog"]["event_gid"]] self.source = source_amulog.AmulogLoader(*args) else: raise NotImplementedError self._filter_rules = config.getlist(conf, "filter", "rules") for method in self._filter_rules: assert method in filter_log.FUNCTIONS dst = conf["general"]["evdb"] if dst == "influx": dbname = conf["database_influx"]["log_dbname"] from . import influx self.evdb = influx.init_influx(conf, dbname, df=False) # self.evdb_df = influx.init_influx(conf, dbname, df = True) else: raise NotImplementedError self._lf = filter_log.init_logfilter(conf, self.source) self._feature_unit_diff = config.getdur(conf, "general", "evdb_unit_diff")
def test_db_sqlite3(self): path_testlog = "/tmp/amulog_testlog" path_db = "/tmp/amulog_db" conf = config.open_config() path_testlog = conf['general']['src_path'] path_db = conf['database']['sqlite3_filename'] tlg = testlog.TestLogGenerator(testlog.DEFAULT_CONFIG, seed=3) tlg.dump_log(path_testlog) l_path = config.getlist(conf, "general", "src_path") if conf.getboolean("general", "src_recur"): targets = common.recur_dir(l_path) else: targets = common.rep_dir(l_path) log_db.process_files(conf, targets, True) ld = log_db.LogData(conf) num = ld.count_lines() self.assertEqual(num, 6539, "not all logs added to database") ltg_num = len([gid for gid in ld.iter_ltgid()]) self.assertTrue(ltg_num > 3 and ltg_num < 10, ("log template generation fails? " "(groups: {0})".format(ltg_num))) del ld common.rm(path_testlog) common.rm(path_db)
def __init__(self, conf, dry=False): super().__init__(conf, dry=dry) src = conf["general"]["log_source"] if src == "amulog": from . import src_amulog args = [ config.getterm(conf, "general", "evdb_whole_term"), conf["database_amulog"]["source_conf"], conf["database_amulog"]["event_gid"], conf.getboolean("database_amulog", "use_anonymize_mapping") ] self.source = src_amulog.AmulogLoader(*args) else: raise NotImplementedError self._filter_rules = config.getlist(conf, "filter", "rules") for method in self._filter_rules: assert method in filter_log.FUNCTIONS self.evdb = self._init_evdb(conf, "log_dbname") # dst = conf["general"]["evdb"] # if dst == "influx": # dbname = conf["database_influx"]["log_dbname"] # from . import influx # self.evdb = influx.init_influx(conf, dbname, df=False) # # self.evdb_df = influx.init_influx(conf, dbname, df = True) # else: # raise NotImplementedError self._lf = None if len(self._filter_rules) > 0: self._lf = filter_log.init_logfilter(conf, self.source) self._feature_unit_diff = config.getdur(conf, "general", "evdb_unit_diff") self._given_amulog_database = conf["database_amulog"]["given_amulog_database"]
def make_crf_train(conf, iterobj, return_ltidlist=False): method = conf["log_template_crf"]["sample_method"] size = conf.getint("log_template_crf", "n_sample") if method == "all": l_train = list(iterobj) elif method == "random": l_train = train_sample_random(iterobj, size) elif method == "ltgen": lt_methods = config.getlist(conf, "log_template_crf", "sample_lt_methods") use_mp = conf.getboolean("log_template_crf", "sample_lt_multiprocess") table = lt_common.TemplateTable() ltgen = amulog.manager.init_ltgen_methods(conf, table, lt_methods, multiprocess=use_mp) l_train = train_sample_ltgen(iterobj, size, ltgen) elif method == "leak": l_train = train_sample_leak(iterobj, size) else: raise NotImplementedError( "Invalid sampling method name {0}".format(method)) if return_ltidlist: train_ltidlist = [lm.lt.ltid for lm in l_train] return l_train, train_ltidlist else: return l_train
def all_args(conf): w_top_dt, w_end_dt = config.getterm(conf, "dag", "whole_term") term = config.getdur(conf, "dag", "unit_term") diff = config.getdur(conf, "dag", "unit_diff") l_args = [] top_dt = w_top_dt while top_dt < w_end_dt: end_dt = top_dt + term l_area = config.getlist(conf, "dag", "area") for area in l_area: l_args.append((conf, (top_dt, end_dt), area)) top_dt = top_dt + diff return l_args
def init_prior_knowledge(conf, args, evmap): from amulog import config methods = config.getlist(conf, "prior_knowledge", "methods") if len(methods) == 0: return None node_ids = evmap.eids() pk = PriorKnowledge(node_ids) for method in methods: if method == "import": rule = conf.get("prior_knowledge", "import_apply_rule") allow_reverse = conf.getboolean("prior_knowledge", "import_allow_reverse") pk = ImportDAG(args, rule, allow_reverse).update(pk, evmap) elif method == "topology": fp = conf.get("prior_knowledge", "single_network_file") pk = Topology(fp).update(pk, evmap) elif method == "multi-topology": d_fp = {} files = config.getlist(conf, "prior_knowledge", "multi_network_file") for group, fp in [s.split(":") for s in files]: d_fp[group] = fp rulestr = config.getlist(conf, "prior_knowledge", "multi_network_group") d_rule = {} for rule in rulestr: group, layer = rule.split(":") d_rule[group] = layer pk = LayeredTopology(d_fp, d_rule).update(pk, evmap) elif method == "independent": pk = HostIndependent().update(pk, evmap) elif method == "additional-source": pk = AdditionalSource().update(pk, evmap) else: raise NotImplementedError("invalid method name {0}".format(method)) return pk
def makeinput(conf, dt_range, area, binarize): evmap = EventDefinitionMap() evlist = [] sources = config.getlist(conf, "dag", "source") for evdef, df in load_event_all(sources, conf, dt_range, area, binarize): eid = evmap.add_evdef(evdef) df.columns = [ eid, ] evlist.append(df) msg = "loaded event {0} {1} (sum: {2})".format(eid, evmap.evdef(eid), df[eid].sum()) _logger.debug(msg) input_df = pd.concat(evlist, axis=1) return input_df, evmap
def init_ltgen_methods(conf, table, lt_methods=None, shuffle=None): if lt_methods is None: lt_methods = config.getlist(conf, "log_template", "lt_methods") if shuffle is None: shuffle = conf.getboolean("log_template_import", "shuffle") if len(lt_methods) > 1: l_ltgen = [] import_index = None for index, method_name in enumerate(lt_methods): ltgen = init_ltgen(conf, table, method_name, shuffle) l_ltgen.append(ltgen) if method_name == "import": import_index = index return lt_common.LTGenJoint(table, l_ltgen, import_index) elif len(lt_methods) == 1: return init_ltgen(conf, table, lt_methods[0], shuffle) else: raise ValueError
def load_event_snmp_all(conf, dt_range, area, d_el=None): if d_el is None: from .source import evgen_snmp el = evgen_snmp.SNMPEventLoader(conf) else: el = d_el["snmp"] areatest = AreaTest(conf) method = conf.get("dag", "ci_bin_method") ci_bin_size = config.getdur(conf, "dag", "ci_bin_size") ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff") l_feature_name = config.getlist(conf, "dag", "snmp_features") if len(l_feature_name) == 0: l_feature_name = el.all_feature() for evdef in el.iter_evdef(l_feature_name): measure, tags = evdef.series() if not areatest.test(area, tags["host"]): continue df = load_event(measure, tags, dt_range, ci_bin_size, ci_bin_diff, method, el) if df is not None: yield evdef, df
def all_args(conf): amulog_conf = config.open_config(conf["database_amulog"]["source_conf"]) from amulog import log_db ld = log_db.LogData(amulog_conf) w_top_dt, w_end_dt = config.getterm(conf, "dag", "whole_term") term = config.getdur(conf, "dag", "unit_term") diff = config.getdur(conf, "dag", "unit_diff") l_args = [] top_dt = w_top_dt while top_dt < w_end_dt: end_dt = top_dt + term l_area = config.getlist(conf, "dag", "area") if "each" in l_area: l_area.pop(l_area.index("each")) l_area += [ "host_" + host for host in ld.whole_host(top_dt, end_dt) ] for area in l_area: l_args.append((conf, (top_dt, end_dt), area)) top_dt = top_dt + diff return l_args
def init_evloaders(conf): return { src: init_evloader(conf, src) for src in config.getlist(conf, "dag", "source") }