Esempio n. 1
0
    def _iter_feature_terms(self, feature_def, dt_range):
        # Avoid convolve boundary problem
        if "convolve" in feature_def["func_list"]:
            if "convolve_radius" in feature_def:
                convolve_radius = feature_def["convolve_radius"]
            else:
                # compatibility for configparser-style rule
                convolve_radius = self.conf.getint("general",
                                                   "evdb_convolve_radius")
        else:
            convolve_radius = 0
        sense_offset = self._feature_bin_size * convolve_radius

        # datetimeindex.get_loc includes stop time (unlike other types!)
        # dtindex_offset remove the stop time
        dtindex_offset = self._feature_bin_size

        if "data_range" in feature_def:
            unit_term = config.str2dur(feature_def["data_range"])
            if "sense_range" in feature_def:
                unit_diff = config.str2dur(feature_def["sense_range"])
            else:
                unit_diff = unit_term
        else:
            # compatibility for configparser-style rule
            unit_term = config.getdur(self.conf, "general", "evdb_unit_term")
            unit_diff = config.getdur(self.conf, "general", "evdb_unit_diff")

        for dts, dte in dtutil.iter_term(dt_range, unit_diff):
            sense_dts = max(dt_range[0], dte - unit_term)
            yield ((dts, dte - dtindex_offset),
                   (sense_dts - sense_offset,
                    dte - dtindex_offset + sense_offset))
Esempio n. 2
0
 def _recur(dt, host, event_name):
     if not self.conf.has_option(section, "recurrence"):
         return
     if self.conf.getboolean(section, "recurrence"):
         if random.random() < self.conf.getfloat(section, "recur_p"):
             durmin = config.getdur(self.conf, section, "recur_dur_min")
             durmax = config.getdur(self.conf, section, "recur_dur_max")
             new_dt = self._dt_delta_rand(dt, durmin, durmax)
             _add_event(new_dt, host, event_name)
Esempio n. 3
0
def all_args(conf):
    w_top_dt, w_end_dt = config.getterm(conf, "dag", "whole_term")
    term = config.getdur(conf, "dag", "unit_term")
    diff = config.getdur(conf, "dag", "unit_diff")

    l_args = []
    top_dt = w_top_dt
    while top_dt < w_end_dt:
        end_dt = top_dt + term
        l_area = config.getlist(conf, "dag", "area")
        for area in l_area:
            l_args.append((conf, (top_dt, end_dt), area))
        top_dt = top_dt + diff
    return l_args
Esempio n. 4
0
    def __init__(self, conf, dry=False):
        super().__init__(conf, dry=dry)
        src = conf["general"]["log_source"]
        if src == "amulog":
            from . import src_amulog
            args = [
                config.getterm(conf, "general", "evdb_whole_term"),
                conf["database_amulog"]["source_conf"],
                conf["database_amulog"]["event_gid"],
                conf.getboolean("database_amulog",
                                "use_anonymize_mapping")
            ]
            self.source = src_amulog.AmulogLoader(*args)
        else:
            raise NotImplementedError
        self._filter_rules = config.getlist(conf, "filter", "rules")
        for method in self._filter_rules:
            assert method in filter_log.FUNCTIONS

        self.evdb = self._init_evdb(conf, "log_dbname")
#        dst = conf["general"]["evdb"]
#        if dst == "influx":
#            dbname = conf["database_influx"]["log_dbname"]
#            from . import influx
#            self.evdb = influx.init_influx(conf, dbname, df=False)
#            # self.evdb_df = influx.init_influx(conf, dbname, df = True)
#        else:
#            raise NotImplementedError

        self._lf = None
        if len(self._filter_rules) > 0:
            self._lf = filter_log.init_logfilter(conf, self.source)
        self._feature_unit_diff = config.getdur(conf,
                                                "general", "evdb_unit_diff")
        self._given_amulog_database = conf["database_amulog"]["given_amulog_database"]
Esempio n. 5
0
def init_logfilter(conf, source):
    kwargs = dict(conf["filter"])
    kwargs["pre_count"] = conf.getint("filter", "pre_count")
    kwargs["pre_term"] = config.getdur(conf, "filter", "pre_term")
    kwargs["fourier_sample_rule"] = [
        tuple(config.str2dur(s) for s in dt_cond.split("_"))
        for dt_cond in config.gettuple(conf, "filter",
                                       "fourier_sample_rule")]
    kwargs["fourier_th_spec"] = conf.getfloat("filter", "fourier_th_spec")
    kwargs["fourier_th_eval"] = conf.getfloat("filter", "fourier_th_eval")
    kwargs["fourier_th_restore"] = conf.getfloat("filter",
                                                 "fourier_th_restore")
    kwargs["fourier_peak_order"] = conf.getint("filter", "fourier_peak_order")

    kwargs["corr_sample_rule"] = [
        tuple(config.str2dur(s) for s in dt_cond.split("_"))
        for dt_cond in config.gettuple(conf, "filter", "corr_sample_rule")]
    kwargs["corr_th"] = conf.getfloat("filter", "corr_th")
    kwargs["corr_diff"] = [config.str2dur(diffstr) for diffstr
                           in config.gettuple(conf, "filter", "corr_diff")]

    kwargs["linear_sample_rule"] = [
        tuple(config.str2dur(s) for s in dt_cond.split("_"))
        for dt_cond in config.gettuple(conf, "filter",
                                       "linear_sample_rule")]
    kwargs["linear_count"] = conf.getint("filter", "linear_count")
    kwargs["linear_th"] = conf.getfloat("filter", "linear_th")

    return LogFilter(source, **kwargs)
Esempio n. 6
0
    def __init__(self, conf, dry=False):
        self.conf = conf
        self.dry = dry
        src = conf["general"]["log_source"]
        if src == "amulog":
            from . import source_amulog
            args = [config.getterm(conf, "general", "evdb_whole_term"),
                    conf["database_amulog"]["source_conf"],
                    conf["database_amulog"]["event_gid"]]
            self.source = source_amulog.AmulogLoader(*args)
        else:
            raise NotImplementedError
        self._filter_rules = config.getlist(conf, "filter", "rules")
        for method in self._filter_rules:
            assert method in filter_log.FUNCTIONS

        dst = conf["general"]["evdb"]
        if dst == "influx":
            dbname = conf["database_influx"]["log_dbname"]
            from . import influx
            self.evdb = influx.init_influx(conf, dbname, df=False)
            # self.evdb_df = influx.init_influx(conf, dbname, df = True)
        else:
            raise NotImplementedError

        self._lf = filter_log.init_logfilter(conf, self.source)
        self._feature_unit_diff = config.getdur(conf,
                                                "general", "evdb_unit_diff")
Esempio n. 7
0
 def __init__(self, conf):
     self._rows = conf.getint("database_rrd", "rows")
     self._cf = conf["database_rrd"]["cf"]
     self._correct_roundup = conf.getboolean("database_rrd",
                                             "correct_roundup")
     self._binsize = int(
         config.getdur(conf, "database_rrd", "binsize").total_seconds())
Esempio n. 8
0
    def _generate_log(self, event):
        dt = event[0]
        host = event[1]
        event_name = event[2]
        info = event[3]
        for log_name in config.gettuple(self.conf, "event_" + event_name,
                                        "logs"):
            section = "log_" + log_name
            mode = self.conf.get(section, "mode")
            form = self.conf.get(section, "format")

            mes = form
            while True:
                match = self._var_re.search(mes)
                if match is None:
                    break
                var_type = match.group().strip("$")
                if var_type in info.keys():
                    var_string = info[var_type]
                elif var_type == "pid":
                    var_string = str(random.randint(1, 65535))
                elif var_type == "host":
                    var_string = host
                else:
                    raise ValueError
                mes = "".join(
                    (mes[:match.start()] + var_string + mes[match.end():]))

            if mode == "each":
                self.l_log.append((dt, host, mes))
            elif mode == "delay_rand":
                delay_min = config.getdur(self.conf, section, "delay_min")
                delay_max = config.getdur(self.conf, section, "delay_max")
                log_dt = self._dt_delta_rand(dt, delay_min, delay_max)
                self.l_log.append((log_dt, host, mes))
            elif mode == "drop_rand":
                drop_p = self.conf.getfloat(section, "drop_p")
                if random.random() > drop_p:
                    self.l_log.append((dt, host, mes))
            elif mode == "other_host_rand":
                l_host = []
                for t_group in config.gettuple(self.conf, section, "groups"):
                    for t_host in self.d_host[t_group]:
                        if not t_host == host:
                            l_host.append(t_host)
                self.l_log.append((dt, random.choice(l_host), mes))
Esempio n. 9
0
def load_event_log_all(conf, dt_range, area, binarize, d_el=None):
    if d_el is None:
        from .source import evgen_log
        el = evgen_log.LogEventLoader(conf)
    else:
        el = d_el[SRCCLS_LOG]

    method = conf.get("dag", "ci_bin_method")
    ci_bin_size = config.getdur(conf, "dag", "ci_bin_size")
    ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff")

    for evdef in el.iter_evdef(dt_range, area):
        measure, tags = evdef.series()
        df = load_event(measure, tags, dt_range, ci_bin_size, ci_bin_diff,
                        method, binarize, el)
        if df is not None:
            yield evdef, df
Esempio n. 10
0
def make_tsdb(ns):
    from . import tsdb
    conf = open_logdag_config(ns)
    term = config.getdur(conf, "database_ts", "unit_term")
    diff = config.getdur(conf, "database_ts", "unit_diff")
    l_args = arguments.all_terms(conf, term, diff)

    timer = common.Timer("mk-tsdb task", output=_logger)
    timer.start()
    p = ns.parallel
    if p > 1:
        for args in l_args:
            tsdb.log2ts_pal(*args, pal=p)
    else:
        for args in l_args:
            tsdb.log2ts(*args)
    timer.stop()
Esempio n. 11
0
def load_merged_events(conf, dt_range, area, l_evdef, d_el):
    """for visualization"""
    areatest = AreaTest(conf)
    ci_bin_method = conf.get("dag", "ci_bin_method")
    ci_bin_size = config.getdur(conf, "dag", "ci_bin_size")
    ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff")

    l_df = []
    for idx, evdef in enumerate(l_evdef):
        tmp_df = _load_merged_event(conf, dt_range, area, evdef, areatest,
                                    ci_bin_size, ci_bin_diff, ci_bin_method,
                                    d_el)
        if tmp_df is None:
            raise ValueError("no time-series for {0}".format(evdef))
        tmp_df.columns = [idx]
        l_df.append(tmp_df)
    return pd.concat(l_df, axis=1)
Esempio n. 12
0
def load_values(conf, gid, host, dt_range):
    method = conf.get("dag", "ci_bin_method")
    ci_bin_size = config.getdur(conf, "dag", "ci_bin_size")
    ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff")
    td = tsdb.TimeSeriesDB(conf)

    kwargs = {"dts": dt_range[0], "dte": dt_range[1], "gid": gid, "host": host}
    l_dt = [dt for dt in td.iter_ts(**kwargs)]

    if method == "sequential":
        array = dtutil.discretize_sequential(l_dt, dt_range, ci_bin_size,
                                             False)
    elif method == "slide":
        array = dtutil.discretize_slide(l_dt, dt_range, ci_bin_diff,
                                        ci_bin_size, False)
    elif method == "radius":
        ci_bin_radius = 0.5 * ci_bin_size
        array = dtutil.discretize_radius(l_dt, dt_range, ci_bin_diff,
                                         ci_bin_radius, False)
    return array
Esempio n. 13
0
def load_event_log_all(conf, dt_range, area, d_el=None):
    if d_el is None:
        from .source import evgen_log
        el = evgen_log.LogEventLoader(conf)
    else:
        el = d_el[SRCCLS_LOG]

    areatest = AreaTest(conf)
    method = conf.get("dag", "ci_bin_method")
    ci_bin_size = config.getdur(conf, "dag", "ci_bin_size")
    ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff")

    for evdef in el.iter_evdef(dt_range):
        measure, tags = evdef.series()
        if not areatest.test(area, tags["host"]):
            continue
        df = load_event(measure, tags, dt_range, ci_bin_size, ci_bin_diff,
                        method, el)
        if df is not None:
            yield evdef, df
Esempio n. 14
0
def load_event_snmp_all(conf, dt_range, area, d_el=None):
    if d_el is None:
        from .source import evgen_snmp
        el = evgen_snmp.SNMPEventLoader(conf)
    else:
        el = d_el["snmp"]
    areatest = AreaTest(conf)
    method = conf.get("dag", "ci_bin_method")
    ci_bin_size = config.getdur(conf, "dag", "ci_bin_size")
    ci_bin_diff = config.getdur(conf, "dag", "ci_bin_diff")

    l_feature_name = config.getlist(conf, "dag", "snmp_features")
    if len(l_feature_name) == 0:
        l_feature_name = el.all_feature()
    for evdef in el.iter_evdef(l_feature_name):
        measure, tags = evdef.series()
        if not areatest.test(area, tags["host"]):
            continue
        df = load_event(measure, tags, dt_range, ci_bin_size, ci_bin_diff,
                        method, el)
        if df is not None:
            yield evdef, df
Esempio n. 15
0
def all_args(conf):
    amulog_conf = config.open_config(conf["database_amulog"]["source_conf"])
    from amulog import log_db
    ld = log_db.LogData(amulog_conf)
    w_top_dt, w_end_dt = config.getterm(conf, "dag", "whole_term")
    term = config.getdur(conf, "dag", "unit_term")
    diff = config.getdur(conf, "dag", "unit_diff")

    l_args = []
    top_dt = w_top_dt
    while top_dt < w_end_dt:
        end_dt = top_dt + term
        l_area = config.getlist(conf, "dag", "area")
        if "each" in l_area:
            l_area.pop(l_area.index("each"))
            l_area += [
                "host_" + host for host in ld.whole_host(top_dt, end_dt)
            ]
        for area in l_area:
            l_args.append((conf, (top_dt, end_dt), area))
        top_dt = top_dt + diff
    return l_args
Esempio n. 16
0
def make_evdb_log_all(ns):
    conf = open_logdag_config(ns)
    dump_org = ns.org
    dry = ns.dry

    timer = common.Timer("make-evdb-log task", output=_logger)
    timer.start()

    from . import evgen_log
    w_term = config.getterm(conf, "general", "evdb_whole_term")
    term = config.getdur(conf, "general", "evdb_unit_diff")
    el = evgen_log.LogEventLoader(conf, dry=dry)
    for dt_range in dtutil.iter_term(w_term, term):
        el.read(dt_range, dump_org=dump_org)
        timer.lap_diff("{0}".format(dt_range))

    timer.stop()
Esempio n. 17
0
def draw_graph_diff(ns):
    conf_fn1, conf_fn2 = ns.confs
    conf1 = arguments.open_logdag_config(conf_fn1)
    conf2 = arguments.open_logdag_config(conf_fn2)
    lv = logging.DEBUG if ns.debug else logging.INFO
    am_logger = logging.getLogger("amulog")
    config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv)

    dts = dtutil.shortstr2dt(ns.timestr)
    dte = dts + config.getdur(conf1, "dag", "unit_term")
    output = ns.filename

    from . import comparison
    cevmap, cgraph = comparison.edge_set_diff(conf1, conf2, (dts, dte))

    from . import draw
    rgraph = draw.relabel_nodes(cgraph, cevmap)
    draw.graph_nx(output, rgraph)
    print(output)
Esempio n. 18
0
def filter_periodic(conf, ld, l_dt, dt_range, evdef, method):
    """Return True and the interval if a_cnt is periodic."""

    ret_false = False, None, None
    gid_name = conf.get("dag", "event_gid")
    p_cnt = conf.getint("filter", "pre_count")
    p_term = config.getdur(conf, "filter", "pre_term")

    # preliminary test
    if len(l_dt) < p_cnt:
        _logger.debug("time-series count too small, skip")
        return ret_false
    elif max(l_dt) - min(l_dt) < p_term:
        _logger.debug("time-series range too small, skip")
        return ret_false

    # periodicity test
    for dt_cond in config.gettuple(conf, "filter", "sample_rule"):
        dt_length, binsize = [config.str2dur(s) for s in dt_cond.split("_")]
        if (dt_range[1] - dt_range[0]) == dt_length:
            temp_l_dt = l_dt
        else:
            temp_l_dt = reload_ts(ld, evdef, dt_length, dt_range, gid_name)
        a_cnt = dtutil.discretize_sequential(temp_l_dt,
                                             dt_range,
                                             binsize,
                                             binarize=False)

        remain_dt = None
        if method == "remove":
            flag, interval = period.fourier_remove(conf, a_cnt, binsize)
        elif method == "replace":
            flag, remain_array, interval = period.fourier_replace(
                conf, a_cnt, binsize)
            if remain_array is not None:
                remain_dt = revert_event(remain_array, dt_range, binsize)
        elif method == "corr":
            flag, interval = period.periodic_corr(conf, a_cnt, binsize)
        else:
            raise NotImplementedError
        if flag:
            return flag, remain_dt, interval
    return ret_false
Esempio n. 19
0
def draw_graph_diff(ns):
    l_conffp = ns.confs
    assert len(l_conffp) == 2
    openconf = lambda c: config.open_config(
        c, ex_defaults=[arguments.DEFAULT_CONFIG])
    conf1, conf2 = [openconf(c) for c in l_conffp]
    lv = logging.DEBUG if ns.debug else logging.INFO
    am_logger = logging.getLogger("amulog")
    config.set_common_logging(conf1, logger=[_logger, am_logger], lv=lv)

    dts = dtutil.shortstr2dt(ns.timestr)
    dte = dts + config.getdur(conf1, "dag", "unit_term")
    output = ns.filename

    from . import comp_conf
    cevmap, cgraph = comp_conf.edge_set_diff(conf1, conf2, (dts, dte))

    from . import draw
    rgraph = draw.relabel_graph(conf1, cgraph, cevmap)
    draw.graph_nx(output, rgraph)
    print(output)
Esempio n. 20
0
def filter_linear(conf, l_dt, dt_range):
    """Return True if a_cnt appear linearly."""
    binsize = config.getdur(conf, "filter", "linear_binsize")
    threshold = conf.getfloat("filter", "linear_threshold")
    th_count = conf.getint("filter", "linear_count")

    if len(l_dt) < th_count:
        return False

    # generate time-series cumulative sum
    length = (dt_range[1] - dt_range[0]).total_seconds()
    bin_length = binsize.total_seconds()
    bins = math.ceil(1.0 * length / bin_length)
    a_stat = np.array([0] * int(bins))
    for dt in l_dt:
        cnt = int((dt - dt_range[0]).total_seconds() / bin_length)
        assert cnt < len(a_stat)
        a_stat[cnt:] += 1

    a_linear = np.linspace(0, len(l_dt), bins, endpoint=False)
    val = sum((a_stat - a_linear)**2) / (bins * len(l_dt))
    return val < threshold
Esempio n. 21
0
 def jobname2args(name, conf):
     area, dtstr = name.split("_", 1)
     dts = dtutil.shortstr2dt(dtstr)
     term = config.getdur(conf, "dag", "unit_term")
     dte = dts + term
     return conf, (dts, dte), area
Esempio n. 22
0
    def __init__(self, conf, parallel=None, dry=False):
        super().__init__(conf, dry=dry)
        self.parallel = parallel
        self._srcdb = conf["general"]["snmp_source"]
        if self._srcdb == "rrd":
            from . import src_rrd
            self.source = src_rrd.RRDLoader(conf)
        elif self._srcdb == "influx":
            source_dbname = conf["database_influx"]["snmp_source_dbname"]
            from . import influx
            self.source = influx.init_influx(conf, source_dbname, df=False)
        else:
            raise NotImplementedError

        self.evdb = self._init_evdb(conf, "snmp_dbname")
        # self._dstdb = conf["general"]["evdb"]
        # if self._dstdb == "influx":
        #     dbname = conf["database_influx"]["snmp_dbname"]
        #     from . import influx
        #     self.evdb = influx.init_influx(conf, dbname, df=False)
        # else:
        #     raise NotImplementedError

        self._ha = host_alias.HostAlias(conf["general"]["host_alias_filename"])

        snmp_def = conf["general"]["snmp_feature_def"]
        with open(snmp_def, "r") as f:
            jsobj = json.load(f)

        # self._d_source: list of dict: seriesdef
        # seriesdef keys: filelist, host, mod_cls, mod_id
        self._d_source = jsobj["source"]

        self._d_vsourcedef = jsobj["vsource"]
        self._init_vsource()

        if isinstance(jsobj["feature"], list):
            self._d_feature = jsobj["feature"]
        elif isinstance(jsobj["feature"], dict):
            # for backward compatibility with configparser-style rule
            self._d_feature = []
            for name, fdef in jsobj["feature"].items():
                fdef["name"] = name
                self._d_feature.append(fdef)
        #self._d_feature = jsobj["feature"]

        self._d_rfeature = defaultdict(list)
        for fdef in self._d_feature:
            src = fdef["source"]
            #d = {"name": fdef["name"],
            #     "column": self._d_feature[name]["column"],
            #     "func_list": self._d_feature[name]["func_list"]}
            self._d_rfeature[src].append(fdef)

        self._feature_unit_term = config.getdur(conf, "general",
                                                "evdb_unit_term")
        self._feature_unit_diff = config.getdur(conf, "general",
                                                "evdb_unit_diff")
        self._feature_bin_size = config.getdur(conf, "general", "evdb_binsize")
        self._feature_convolve_radius = conf.getint("general",
                                                    "evdb_convolve_radius")
        self._mproc = None
Esempio n. 23
0
def edge_temporal_sort(ldag,
                       time_condition,
                       search_condition=None,
                       reverse=False,
                       view_context="edge",
                       load_cache=True,
                       graph=None):
    assert "time" in time_condition or "time_range" in time_condition
    if graph is None:
        graph = ldag.graph

    from amulog import config
    ci_bin_size = config.getdur(ldag.conf, "dag", "ci_bin_size")

    nodes = set()  # nodes with any adjacent edges
    for edge in graph.edges():
        nodes.add(edge[0])
        nodes.add(edge[1])
    df_ts = ldag.node_ts(list(nodes))

    if "time" in time_condition:
        dt = time_condition["time"]
        sr_diff_td = (df_ts.index.to_series() + (0.5 * ci_bin_size) - dt).abs()
        sr_diff = sr_diff_td.map(lambda x: x.total_seconds())
        df_score = df_ts.apply(lambda x: x * sr_diff / sum(x))
    else:
        dts, dte = time_condition["time_range"]
        diff = []
        for tmp_ts in df_ts.index:
            ts = tmp_ts + 0.5 * ci_bin_size
            if ts < dts:
                diff.append((dts - ts).total_seconds())
            elif ts > dte:
                diff.append((ts - dte).total_seconds())
            else:  # dts <= ts <= dte
                diff.append(float(0))
        sr_diff = pd.Series(diff, index=df_ts.index)
        df_score = df_ts.apply(lambda x: x * sr_diff / sum(x))

    items = []
    edges = [
        edge
        for edge in showdag.remove_edge_duplication(ldag.graph.edges(), ldag)
        if showdag.check_conditions(edge, ldag, search_condition)
    ]
    for edge in edges:
        score = (sum(df_score[edge[0]]) + sum(df_score[edge[1]])) / 2
        items.append((edge, score))

    l_buf = []
    prev = None
    for edge, score in sorted(items, key=lambda x: x[1], reverse=reverse):
        if showdag.check_conditions(edge, ldag, search_condition):
            if score != prev:
                if prev is not None:
                    l_buf.append("")
                l_buf.append("[average_diff_sec={0}]".format(score))
                prev = score
            msg = showdag.edge_view(edge,
                                    ldag,
                                    context=view_context,
                                    load_cache=load_cache,
                                    graph=graph)
            l_buf.append(msg)
    return "\n".join(l_buf)
Esempio n. 24
0
def _iter_evdb_term(conf):
    w_term = config.getterm(conf, "general", "evdb_whole_term")
    term = config.getdur(conf, "general", "evdb_unit_diff")
    return dtutil.iter_term(w_term, term)