def init_logfilter(conf, source): kwargs = dict(conf["filter"]) kwargs["pre_count"] = conf.getint("filter", "pre_count") kwargs["pre_term"] = config.getdur(conf, "filter", "pre_term") kwargs["fourier_sample_rule"] = [ tuple(config.str2dur(s) for s in dt_cond.split("_")) for dt_cond in config.gettuple(conf, "filter", "fourier_sample_rule")] kwargs["fourier_th_spec"] = conf.getfloat("filter", "fourier_th_spec") kwargs["fourier_th_eval"] = conf.getfloat("filter", "fourier_th_eval") kwargs["fourier_th_restore"] = conf.getfloat("filter", "fourier_th_restore") kwargs["fourier_peak_order"] = conf.getint("filter", "fourier_peak_order") kwargs["corr_sample_rule"] = [ tuple(config.str2dur(s) for s in dt_cond.split("_")) for dt_cond in config.gettuple(conf, "filter", "corr_sample_rule")] kwargs["corr_th"] = conf.getfloat("filter", "corr_th") kwargs["corr_diff"] = [config.str2dur(diffstr) for diffstr in config.gettuple(conf, "filter", "corr_diff")] kwargs["linear_sample_rule"] = [ tuple(config.str2dur(s) for s in dt_cond.split("_")) for dt_cond in config.gettuple(conf, "filter", "linear_sample_rule")] kwargs["linear_count"] = conf.getint("filter", "linear_count") kwargs["linear_th"] = conf.getfloat("filter", "linear_th") return LogFilter(source, **kwargs)
def _iter_feature_terms(self, feature_def, dt_range): # Avoid convolve boundary problem if "convolve" in feature_def["func_list"]: if "convolve_radius" in feature_def: convolve_radius = feature_def["convolve_radius"] else: # compatibility for configparser-style rule convolve_radius = self.conf.getint("general", "evdb_convolve_radius") else: convolve_radius = 0 sense_offset = self._feature_bin_size * convolve_radius # datetimeindex.get_loc includes stop time (unlike other types!) # dtindex_offset remove the stop time dtindex_offset = self._feature_bin_size if "data_range" in feature_def: unit_term = config.str2dur(feature_def["data_range"]) if "sense_range" in feature_def: unit_diff = config.str2dur(feature_def["sense_range"]) else: unit_diff = unit_term else: # compatibility for configparser-style rule unit_term = config.getdur(self.conf, "general", "evdb_unit_term") unit_diff = config.getdur(self.conf, "general", "evdb_unit_diff") for dts, dte in dtutil.iter_term(dt_range, unit_diff): sense_dts = max(dt_range[0], dte - unit_term) yield ((dts, dte - dtindex_offset), (sense_dts - sense_offset, dte - dtindex_offset + sense_offset))
def test_load_asis(self): conf = config.open_config(arguments.DEFAULT_CONFIG, base_default=False) conf["general"]["evdb"] = "sql" conf["database_sql"]["database"] = "sqlite3" conf["database_amulog"]["source_conf"] = self._path_amulogconf conf["database_sql"]["sqlite3_filename"] = self._path_testdb conf["filter"]["rules"] = "" from logdag import dtutil from logdag.source import evgen_log w_term = self._whole_term size = config.str2dur("1d") el = evgen_log.LogEventLoader(conf) for dt_range in dtutil.iter_term(w_term, size): el.read(dt_range, dump_org=False) am = arguments.ArgumentManager(conf) am.generate(arguments.all_args) from logdag import makedag edge_cnt = 0 for args in am: ldag = makedag.makedag_main(args, do_dump=False) edge_cnt += ldag.number_of_edges() assert edge_cnt > 0
def get_df(self, measure, d_tags, fields, dt_range, str_bin=None, func=None, fill=None, limit=None): if fields is None: fields = self.list_fields(measure) cursor = self._get(measure, d_tags, fields, dt_range) l_dt = [] l_values = [] for rid, row in enumerate(cursor): if limit is not None and rid >= limit: break dtstr, values = self._get_row_values(row) # obtained as naive(utc), converted into aware(local) l_dt.append(self.pdtimestamp(self._db.strptime(dtstr))) if fill: values = values.nan_to_num(fill) l_values.append(values) sortidx = np.argsort(l_dt) sorted_l_dt = [l_dt[idx] for idx in sortidx] sorted_l_values = [l_values[idx] for idx in sortidx] if func is None: dtindex = self.pdtimestamps(l_dt) return pd.DataFrame(l_values, index=dtindex, columns=fields) elif func == "sum": assert str_bin is not None binsize = config.str2dur(str_bin) dtindex = self.pdtimestamps( dtutil.range_dt(dt_range[0], dt_range[1], binsize)) d_values = {} if len(l_dt) == 0: for field in fields: d_values[field] = [float(0)] * len(dtindex) else: for fid, series in enumerate(zip(*sorted_l_values)): a_cnt = dtutil.discretize_sequential(sorted_l_dt, dt_range, binsize, l_dt_values=series) d_values[fields[fid]] = a_cnt return pd.DataFrame(d_values, index=dtindex) else: raise NotImplementedError
def test_anonymize_restore(self): from amulog import __main__ as amulog_main from amulog import manager targets = amulog_main.get_targets_conf(self._amulog_conf) manager.process_files_online(self._amulog_conf, targets, reset_db=True) from amulog import anonymize am = anonymize.AnonymizeMapper(self._amulog_conf) am.anonymize(self._amulog_conf_anonymize) am.dump() conf = config.open_config(arguments.DEFAULT_CONFIG, base_default=False) conf["general"]["evdb"] = "sql" conf["database_sql"]["database"] = "sqlite3" conf["database_amulog"]["source_conf"] = self._path_amulogconf_anonymize conf["database_amulog"]["use_anonymize_mapping"] = "true" conf["database_amulog"]["given_amulog_database"] = "original" conf["database_sql"]["sqlite3_filename"] = self._path_testdb conf["dag"]["event_detail_cache"] = "false" conf["filter"]["rules"] = "" from logdag import dtutil from logdag.source import evgen_log w_term = self._whole_term size = config.str2dur("1d") el = evgen_log.LogEventLoader(conf) for dt_range in dtutil.iter_term(w_term, size): el.read(dt_range, dump_org=False) am = arguments.ArgumentManager(conf) am.generate(arguments.all_args) from logdag import makedag from logdag import showdag edge_cnt = 0 for args in am: conf["database_amulog"]["source_conf"] = self._path_amulogconf_anonymize ldag = makedag.makedag_main(args, do_dump=False) conf["database_amulog"]["source_conf"] = self._path_amulogconf showdag.show_subgraphs(ldag, "detail", load_cache=False, graph=None) # print(showdag.show_subgraphs(ldag, "detail", # load_cache=False, graph=None)) edge_cnt += ldag.number_of_edges() assert edge_cnt > 0
def filter_periodic(conf, ld, l_dt, dt_range, evdef, method): """Return True and the interval if a_cnt is periodic.""" ret_false = False, None, None gid_name = conf.get("dag", "event_gid") p_cnt = conf.getint("filter", "pre_count") p_term = config.getdur(conf, "filter", "pre_term") # preliminary test if len(l_dt) < p_cnt: _logger.debug("time-series count too small, skip") return ret_false elif max(l_dt) - min(l_dt) < p_term: _logger.debug("time-series range too small, skip") return ret_false # periodicity test for dt_cond in config.gettuple(conf, "filter", "sample_rule"): dt_length, binsize = [config.str2dur(s) for s in dt_cond.split("_")] if (dt_range[1] - dt_range[0]) == dt_length: temp_l_dt = l_dt else: temp_l_dt = reload_ts(ld, evdef, dt_length, dt_range, gid_name) a_cnt = dtutil.discretize_sequential(temp_l_dt, dt_range, binsize, binarize=False) remain_dt = None if method == "remove": flag, interval = period.fourier_remove(conf, a_cnt, binsize) elif method == "replace": flag, remain_array, interval = period.fourier_replace( conf, a_cnt, binsize) if remain_array is not None: remain_dt = revert_event(remain_array, dt_range, binsize) elif method == "corr": flag, interval = period.periodic_corr(conf, a_cnt, binsize) else: raise NotImplementedError if flag: return flag, remain_dt, interval return ret_false