Exemple #1
0
def makeinput(conf, dt_range, area):
    evmap = EventDefinitionMap()
    evlist = []
    sources = config.getlist(conf, "dag", "source")
    for evdef, df in load_event_all(sources, conf, dt_range, area):
        eid = evmap.add_evdef(evdef)
        df.columns = [
            eid,
        ]
        evlist.append(df)
        msg = "loaded event {0} {1} (sum: {2})".format(eid, evmap.evdef(eid),
                                                       df[eid].sum())
        _logger.debug(msg)

    if len(evlist) == 0:
        _logger.warning("No data loaded")
        return None, None

    merge_sync = conf.getboolean("dag", "merge_syncevent")
    if merge_sync:
        merge_sync_rules = config.getlist(conf, "dag", "merge_syncevent_rules")
        evlist, evmap = merge_sync_event(evlist, evmap, merge_sync_rules)

    input_df = pd.concat(evlist, axis=1)
    return input_df, evmap
Exemple #2
0
def init_pruner(conf):
    from amulog import config
    l_pruner = []
    methods = config.getlist(conf, "pc_prune", "methods")
    for method in methods:
        if method == "topology":
            fp = conf.get("pc_prune", "single_network_file")
            l_pruner.append(SingleLayerTopology(fp))
        elif method == "multi-topology":
            d_fp = {}
            files = config.getlist(conf, "pc_prune", "multi_network_file")
            for group, fp in [s.split(":") for s in files]:
                d_fp[group] = fp
            rulestr = config.getlist(conf, "pc_prune", "multi_network_group")
            d_rule = {}
            for rule in rulestr:
                group, layer = rule.split(":")
                d_rule[group] = layer
            l_pruner.append(MultiLayerTopology(d_fp, d_rule))
        elif method == "independent":
            l_pruner.append(Independent())
        else:
            raise NotImplementedError("invalid method {0}".format(method))

    return l_pruner
Exemple #3
0
def generate_json(iter_fp, conf):
    ha = host_alias.HostAlias(conf["general"]["host_alias_filename"])

    sources = conf.options("source")
    d_group = defaultdict(list)
    for fp in iter_fp:
        try:
            tmp_host, mod_cls, mod_id = parse_filename(fp, conf)
            host = ha.resolve_host(tmp_host)
            if host is not None:
                d_group[(host, mod_cls, mod_id)].append(fp)
        except ValueError:
            pass

    d_source = defaultdict(list)
    for (host, mod_cls, mod_id), l_fp in d_group.items():
        for source_name in sources:
            if mod_cls in config.getlist(conf, "source", source_name):
                d_source[source_name].append({
                    "filelist": l_fp,
                    "host": host,
                    "mod_cls": mod_cls,
                    "mod_id": mod_id
                })

    # generate vsource: dict with src, func
    d_vsource = {}
    for vsource_name in conf.options("vsource"):
        src, func = config.getlist(conf, "vsource", vsource_name)
        d = {"src": src, "func": func}
        d_vsource[vsource_name] = d

    # generate features: dict of feature
    # feature: name, source, column, func_list
    d_feature = {}
    for feature_name in sorted(conf.options("feature")):
        tmp = config.getlist(conf, "feature", feature_name)
        sourcename = tmp[0]
        keyfunc = tmp[1]
        l_postfunc = tmp[2:]
        d = {
            "name": feature_name,
            "source": sourcename,
            "column": keyfunc,
            "func_list": l_postfunc
        }
        if "in" in keyfunc:
            d["direction"] = "in"
            d["group"] = "interface"
        elif "out" in keyfunc:
            d["direction"] = "out"
            d["group"] = "interface"
        else:
            d["group"] = "system"
        d_feature[feature_name] = d

    js = {"source": d_source, "vsource": d_vsource, "feature": d_feature}
    return js
Exemple #4
0
    def __init__(self, conf, dry=False):
        self.conf = conf
        self.dry = dry
        src = conf["general"]["log_source"]
        if src == "amulog":
            from . import source_amulog
            args = [config.getterm(conf, "general", "evdb_whole_term"),
                    conf["database_amulog"]["source_conf"],
                    conf["database_amulog"]["event_gid"]]
            self.source = source_amulog.AmulogLoader(*args)
        else:
            raise NotImplementedError
        self._filter_rules = config.getlist(conf, "filter", "rules")
        for method in self._filter_rules:
            assert method in filter_log.FUNCTIONS

        dst = conf["general"]["evdb"]
        if dst == "influx":
            dbname = conf["database_influx"]["log_dbname"]
            from . import influx
            self.evdb = influx.init_influx(conf, dbname, df=False)
            # self.evdb_df = influx.init_influx(conf, dbname, df = True)
        else:
            raise NotImplementedError

        self._lf = filter_log.init_logfilter(conf, self.source)
        self._feature_unit_diff = config.getdur(conf,
                                                "general", "evdb_unit_diff")
Exemple #5
0
    def test_db_sqlite3(self):
        path_testlog = "/tmp/amulog_testlog"
        path_db = "/tmp/amulog_db"

        conf = config.open_config()
        path_testlog = conf['general']['src_path']
        path_db = conf['database']['sqlite3_filename']

        tlg = testlog.TestLogGenerator(testlog.DEFAULT_CONFIG, seed=3)
        tlg.dump_log(path_testlog)

        l_path = config.getlist(conf, "general", "src_path")
        if conf.getboolean("general", "src_recur"):
            targets = common.recur_dir(l_path)
        else:
            targets = common.rep_dir(l_path)
        log_db.process_files(conf, targets, True)

        ld = log_db.LogData(conf)
        num = ld.count_lines()
        self.assertEqual(num, 6539, "not all logs added to database")
        ltg_num = len([gid for gid in ld.iter_ltgid()])
        self.assertTrue(ltg_num > 3 and ltg_num < 10,
                        ("log template generation fails? "
                         "(groups: {0})".format(ltg_num)))

        del ld
        common.rm(path_testlog)
        common.rm(path_db)
Exemple #6
0
    def __init__(self, conf, dry=False):
        super().__init__(conf, dry=dry)
        src = conf["general"]["log_source"]
        if src == "amulog":
            from . import src_amulog
            args = [
                config.getterm(conf, "general", "evdb_whole_term"),
                conf["database_amulog"]["source_conf"],
                conf["database_amulog"]["event_gid"],
                conf.getboolean("database_amulog",
                                "use_anonymize_mapping")
            ]
            self.source = src_amulog.AmulogLoader(*args)
        else:
            raise NotImplementedError
        self._filter_rules = config.getlist(conf, "filter", "rules")
        for method in self._filter_rules:
            assert method in filter_log.FUNCTIONS

        self.evdb = self._init_evdb(conf, "log_dbname")
#        dst = conf["general"]["evdb"]
#        if dst == "influx":
#            dbname = conf["database_influx"]["log_dbname"]
#            from . import influx
#            self.evdb = influx.init_influx(conf, dbname, df=False)
#            # self.evdb_df = influx.init_influx(conf, dbname, df = True)
#        else:
#            raise NotImplementedError

        self._lf = None
        if len(self._filter_rules) > 0:
            self._lf = filter_log.init_logfilter(conf, self.source)
        self._feature_unit_diff = config.getdur(conf,
                                                "general", "evdb_unit_diff")
        self._given_amulog_database = conf["database_amulog"]["given_amulog_database"]
Exemple #7
0
def make_crf_train(conf, iterobj, return_ltidlist=False):
    method = conf["log_template_crf"]["sample_method"]
    size = conf.getint("log_template_crf", "n_sample")
    if method == "all":
        l_train = list(iterobj)
    elif method == "random":
        l_train = train_sample_random(iterobj, size)
    elif method == "ltgen":
        lt_methods = config.getlist(conf, "log_template_crf",
                                    "sample_lt_methods")
        use_mp = conf.getboolean("log_template_crf", "sample_lt_multiprocess")
        table = lt_common.TemplateTable()
        ltgen = amulog.manager.init_ltgen_methods(conf, table, lt_methods,
                                                  multiprocess=use_mp)
        l_train = train_sample_ltgen(iterobj, size, ltgen)
    elif method == "leak":
        l_train = train_sample_leak(iterobj, size)
    else:
        raise NotImplementedError(
            "Invalid sampling method name {0}".format(method))

    if return_ltidlist:
        train_ltidlist = [lm.lt.ltid for lm in l_train]
        return l_train, train_ltidlist
    else:
        return l_train
Exemple #8
0
def all_args(conf):
    w_top_dt, w_end_dt = config.getterm(conf, "dag", "whole_term")
    term = config.getdur(conf, "dag", "unit_term")
    diff = config.getdur(conf, "dag", "unit_diff")

    l_args = []
    top_dt = w_top_dt
    while top_dt < w_end_dt:
        end_dt = top_dt + term
        l_area = config.getlist(conf, "dag", "area")
        for area in l_area:
            l_args.append((conf, (top_dt, end_dt), area))
        top_dt = top_dt + diff
    return l_args
Exemple #9
0
def init_prior_knowledge(conf, args, evmap):
    from amulog import config
    methods = config.getlist(conf, "prior_knowledge", "methods")
    if len(methods) == 0:
        return None

    node_ids = evmap.eids()
    pk = PriorKnowledge(node_ids)
    for method in methods:
        if method == "import":
            rule = conf.get("prior_knowledge", "import_apply_rule")
            allow_reverse = conf.getboolean("prior_knowledge",
                                            "import_allow_reverse")
            pk = ImportDAG(args, rule, allow_reverse).update(pk, evmap)
        elif method == "topology":
            fp = conf.get("prior_knowledge", "single_network_file")
            pk = Topology(fp).update(pk, evmap)
        elif method == "multi-topology":
            d_fp = {}
            files = config.getlist(conf, "prior_knowledge",
                                   "multi_network_file")
            for group, fp in [s.split(":") for s in files]:
                d_fp[group] = fp
            rulestr = config.getlist(conf, "prior_knowledge",
                                     "multi_network_group")
            d_rule = {}
            for rule in rulestr:
                group, layer = rule.split(":")
                d_rule[group] = layer
            pk = LayeredTopology(d_fp, d_rule).update(pk, evmap)
        elif method == "independent":
            pk = HostIndependent().update(pk, evmap)
        elif method == "additional-source":
            pk = AdditionalSource().update(pk, evmap)
        else:
            raise NotImplementedError("invalid method name {0}".format(method))
    return pk
Exemple #10
0
def makeinput(conf, dt_range, area, binarize):
    evmap = EventDefinitionMap()
    evlist = []
    sources = config.getlist(conf, "dag", "source")
    for evdef, df in load_event_all(sources, conf, dt_range, area, binarize):
        eid = evmap.add_evdef(evdef)
        df.columns = [
            eid,
        ]
        evlist.append(df)
        msg = "loaded event {0} {1} (sum: {2})".format(eid, evmap.evdef(eid),
                                                       df[eid].sum())
        _logger.debug(msg)
    input_df = pd.concat(evlist, axis=1)
    return input_df, evmap
Exemple #11
0
def init_ltgen_methods(conf, table, lt_methods=None, shuffle=None):
    if lt_methods is None:
        lt_methods = config.getlist(conf, "log_template", "lt_methods")
    if shuffle is None:
        shuffle = conf.getboolean("log_template_import", "shuffle")

    if len(lt_methods) > 1:
        l_ltgen = []
        import_index = None
        for index, method_name in enumerate(lt_methods):
            ltgen = init_ltgen(conf, table, method_name, shuffle)
            l_ltgen.append(ltgen)
            if method_name == "import":
                import_index = index
        return lt_common.LTGenJoint(table, l_ltgen, import_index)
    elif len(lt_methods) == 1:
        return init_ltgen(conf, table, lt_methods[0], shuffle)
    else:
        raise ValueError
Exemple #12
0
def load_event_snmp_all(conf, dt_range, area, d_el=None):
    if d_el is None:
        from .source import evgen_snmp
        el = evgen_snmp.SNMPEventLoader(conf)
    else:
        el = d_el["snmp"]
    areatest = AreaTest(conf)
    method = conf.get("dag", "ci_bin_method")
    ci_bin_size = config.getdur(conf, "dag", "ci_bin_size")
    ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff")

    l_feature_name = config.getlist(conf, "dag", "snmp_features")
    if len(l_feature_name) == 0:
        l_feature_name = el.all_feature()
    for evdef in el.iter_evdef(l_feature_name):
        measure, tags = evdef.series()
        if not areatest.test(area, tags["host"]):
            continue
        df = load_event(measure, tags, dt_range, ci_bin_size, ci_bin_diff,
                        method, el)
        if df is not None:
            yield evdef, df
Exemple #13
0
def all_args(conf):
    amulog_conf = config.open_config(conf["database_amulog"]["source_conf"])
    from amulog import log_db
    ld = log_db.LogData(amulog_conf)
    w_top_dt, w_end_dt = config.getterm(conf, "dag", "whole_term")
    term = config.getdur(conf, "dag", "unit_term")
    diff = config.getdur(conf, "dag", "unit_diff")

    l_args = []
    top_dt = w_top_dt
    while top_dt < w_end_dt:
        end_dt = top_dt + term
        l_area = config.getlist(conf, "dag", "area")
        if "each" in l_area:
            l_area.pop(l_area.index("each"))
            l_area += [
                "host_" + host for host in ld.whole_host(top_dt, end_dt)
            ]
        for area in l_area:
            l_args.append((conf, (top_dt, end_dt), area))
        top_dt = top_dt + diff
    return l_args
Exemple #14
0
def init_evloaders(conf):
    return {
        src: init_evloader(conf, src)
        for src in config.getlist(conf, "dag", "source")
    }