def makedag_mprocess(am, pal=1): import multiprocessing timer = common.Timer("makedag task", output=_logger) timer.start() with multiprocessing.Pool(processes=pal) as pool: pool.map(makedag.makedag_main, am) timer.stop()
def log2ts_pal(conf, dt_range, pal=1): from amulog import common timer = common.Timer( "make-tsdb subtask ({0[0]} - {0[1]})".format(dt_range), output=_logger) timer.start() gid_name = conf.get("dag", "event_gid") usefilter = conf.getboolean("database_ts", "usefilter") from amulog import log_db ld = log_db.LogData(conf) if gid_name == "ltid": iterobj = ld.whole_host_lt(dt_range[0], dt_range[1], "all") elif gid_name == "ltgid": iterobj = ld.whole_host_ltg(dt_range[0], dt_range[1], "all") else: raise NotImplementedError import multiprocessing td = TimeSeriesDB(conf, edit=True) l_args = [(conf, dt_range, gid, host) for host, gid in iterobj] with multiprocessing.Pool(processes=pal) as pool: for ret in pool.imap_unordered(log2ts_elem, l_args): gid, host, stat, new_l_dt, val = ret if new_l_dt is not None and len(new_l_dt) > 0: for dt in new_l_dt: td.add_line(dt, gid, host) td.add_filterlog(dt_range, gid, host, stat, val) pool.close() pool.join() td.commit() timer.stop() return
def make_dag_stdin(ns): from . import makedag conf = open_logdag_config(ns) am = arguments.ArgumentManager(conf) am.init_dirs(conf) args = am.jobname2args(ns.argname, conf) timer = common.Timer("makedag task for {0}".format(ns.argname), output=_logger) timer.start() makedag.makedag_main(args) timer.stop()
def make_evdb_log_all(ns): conf = open_logdag_config(ns) dump_org = ns.org dry = ns.dry timer = common.Timer("make-evdb-log task", output=_logger) timer.start() from . import evgen_log w_term = config.getterm(conf, "general", "evdb_whole_term") term = config.getdur(conf, "general", "evdb_unit_diff") el = evgen_log.LogEventLoader(conf, dry=dry) for dt_range in dtutil.iter_term(w_term, term): el.read(dt_range, dump_org=dump_org) timer.lap_diff("{0}".format(dt_range)) timer.stop()
def make_tsdb(ns): from . import tsdb conf = open_logdag_config(ns) term = config.getdur(conf, "database_ts", "unit_term") diff = config.getdur(conf, "database_ts", "unit_diff") l_args = arguments.all_terms(conf, term, diff) timer = common.Timer("mk-tsdb task", output=_logger) timer.start() p = ns.parallel if p > 1: for args in l_args: tsdb.log2ts_pal(*args, pal=p) else: for args in l_args: tsdb.log2ts(*args) timer.stop()
def makedag_main(args, do_dump=False): jobname = arguments.args2name(args) conf, dt_range, area = args if conf.getboolean("dag", "pass_dag_exists"): import os.path if os.path.exists(showdag.LogDAG.dag_path(args)): _logger.info("dag file for job({0}) exists, passed".format(jobname)) return None timer = common.Timer("makedag job({0})".format(jobname), output=_logger) timer.start() # generate time-series nodes # generate event set and evmap, and apply preprocessing input_df, evmap = log2event.makeinput(conf, dt_range, area) if input_df is None: return None _logger.info("{0} input shape: {1}".format(jobname, input_df.shape)) if do_dump: evmap.dump(args) timer.lap("load-nodes") # generate prior knowledge from . import pknowledge prior_knowledge = pknowledge.init_prior_knowledge(conf, args, evmap) timer.lap("make-prior-knowledge") # generate dag graph = estimate_dag(conf, input_df, prior_knowledge) timer.lap("estimate-dag") if graph is None: _logger.info("job({0}) failed on causal inference".format(jobname)) return None # record dag ldag = showdag.LogDAG(args, graph=graph, evmap=evmap) if do_dump: ldag.dump() timer.stop() return ldag
def makedag_main(args): jobname = arguments.args2name(args) conf, dt_range, area = args timer = common.Timer("makedag job({0})".format(jobname), output=_logger) timer.start() ci_func = conf.get("dag", "ci_func") binarize = is_binarize(ci_func) # generate event set and evmap, and apply preprocessing # d_input, evmap = log2event.ts2input(conf, dt_range, area, binarize) input_df, evmap = log2event.makeinput(conf, dt_range, area, binarize) _logger.info("{0} pc input shape: {1}".format(jobname, input_df.shape)) evmap.dump(conf, args) timer.lap("load-nodes") node_ids = evmap.eids() g = _complete_graph(node_ids) if conf.getboolean("pc_prune", "do_pruning"): from . import prune n_edges_before = g.number_of_edges() init_graph = prune.prune_graph(g, conf, evmap) n_edges_after = init_graph.number_of_edges() _logger.info("{0} DAG edge pruning: ".format(jobname) + \ "{0} -> {1}".format(n_edges_before, n_edges_after)) else: n_edges = g.number_of_edges() init_graph = g _logger.info("{0} DAG edge candidates: ".format(jobname) + \ "{0}".format(n_edges)) timer.lap("prune-dag") graph = estimate_dag(conf, input_df, ci_func, init_graph) timer.lap("estimate-dag") # record dag ldag = showdag.LogDAG(args, graph) ldag.dump() timer.stop() return ldag
def measure_parameters(conf, targets, method): param_candidates = list(_get_param_candidates(method)) n_trial = len(param_candidates) ps = ParameterSearcher(conf, n_trial) ps.load() from amulog import log_db for trial_id, params in enumerate(param_candidates): timer = common.Timer("measure-parameters trial{0}".format( trial_id), output=_logger) timer.start() ps.init_trial(trial_id, params) table = lt_common.TemplateTable() ltgen = _init_ltgen_with_params(conf, table, method, params) input_lines = list(amulog.manager.iter_plines(conf, targets)) d_tid = ltgen.process_offline(input_lines) iterobj = zip(input_lines, ps.tid_list_answer(), ps.iter_tpl_answer()) for mid, (pline, tid_answer, tpl_answer) in enumerate(iterobj): if tid_answer is None: tid_trial = None tpl_trial = None else: tid_trial = d_tid[mid] if tid_trial is None: tpl_trial = None else: try: tpl_trial = ltgen.get_tpl(tid_trial) except: import pdb; pdb.set_trace() ps.add_trial(tid_trial, tpl_trial, tid_answer, tpl_answer, pline["words"]) ps.dump_trial() timer.stop() return ps
def makedag_sprocess(am): timer = common.Timer("makedag task", output=_logger) timer.start() for args in am: makedag.makedag_main(args) timer.stop()