Ejemplo n.º 1
0
 def makedag_mprocess(am, pal=1):
     import multiprocessing
     timer = common.Timer("makedag task", output=_logger)
     timer.start()
     with multiprocessing.Pool(processes=pal) as pool:
         pool.map(makedag.makedag_main, am)
     timer.stop()
Ejemplo n.º 2
0
def log2ts_pal(conf, dt_range, pal=1):
    from amulog import common
    timer = common.Timer(
        "make-tsdb subtask ({0[0]} - {0[1]})".format(dt_range), output=_logger)
    timer.start()

    gid_name = conf.get("dag", "event_gid")
    usefilter = conf.getboolean("database_ts", "usefilter")

    from amulog import log_db
    ld = log_db.LogData(conf)
    if gid_name == "ltid":
        iterobj = ld.whole_host_lt(dt_range[0], dt_range[1], "all")
    elif gid_name == "ltgid":
        iterobj = ld.whole_host_ltg(dt_range[0], dt_range[1], "all")
    else:
        raise NotImplementedError

    import multiprocessing
    td = TimeSeriesDB(conf, edit=True)
    l_args = [(conf, dt_range, gid, host) for host, gid in iterobj]
    with multiprocessing.Pool(processes=pal) as pool:
        for ret in pool.imap_unordered(log2ts_elem, l_args):
            gid, host, stat, new_l_dt, val = ret
            if new_l_dt is not None and len(new_l_dt) > 0:
                for dt in new_l_dt:
                    td.add_line(dt, gid, host)
            td.add_filterlog(dt_range, gid, host, stat, val)
        pool.close()
        pool.join()
    td.commit()
    timer.stop()
    return
Ejemplo n.º 3
0
def make_dag_stdin(ns):
    from . import makedag

    conf = open_logdag_config(ns)

    am = arguments.ArgumentManager(conf)
    am.init_dirs(conf)
    args = am.jobname2args(ns.argname, conf)

    timer = common.Timer("makedag task for {0}".format(ns.argname),
                         output=_logger)
    timer.start()
    makedag.makedag_main(args)
    timer.stop()
Ejemplo n.º 4
0
def make_evdb_log_all(ns):
    conf = open_logdag_config(ns)
    dump_org = ns.org
    dry = ns.dry

    timer = common.Timer("make-evdb-log task", output=_logger)
    timer.start()

    from . import evgen_log
    w_term = config.getterm(conf, "general", "evdb_whole_term")
    term = config.getdur(conf, "general", "evdb_unit_diff")
    el = evgen_log.LogEventLoader(conf, dry=dry)
    for dt_range in dtutil.iter_term(w_term, term):
        el.read(dt_range, dump_org=dump_org)
        timer.lap_diff("{0}".format(dt_range))

    timer.stop()
Ejemplo n.º 5
0
def make_tsdb(ns):
    from . import tsdb
    conf = open_logdag_config(ns)
    term = config.getdur(conf, "database_ts", "unit_term")
    diff = config.getdur(conf, "database_ts", "unit_diff")
    l_args = arguments.all_terms(conf, term, diff)

    timer = common.Timer("mk-tsdb task", output=_logger)
    timer.start()
    p = ns.parallel
    if p > 1:
        for args in l_args:
            tsdb.log2ts_pal(*args, pal=p)
    else:
        for args in l_args:
            tsdb.log2ts(*args)
    timer.stop()
Ejemplo n.º 6
0
def makedag_main(args, do_dump=False):
    jobname = arguments.args2name(args)
    conf, dt_range, area = args

    if conf.getboolean("dag", "pass_dag_exists"):
        import os.path
        if os.path.exists(showdag.LogDAG.dag_path(args)):
            _logger.info("dag file for job({0}) exists, passed".format(jobname))
            return None

    timer = common.Timer("makedag job({0})".format(jobname), output=_logger)
    timer.start()

    # generate time-series nodes
    # generate event set and evmap, and apply preprocessing
    input_df, evmap = log2event.makeinput(conf, dt_range, area)
    if input_df is None:
        return None
    _logger.info("{0} input shape: {1}".format(jobname, input_df.shape))
    if do_dump:
        evmap.dump(args)
    timer.lap("load-nodes")

    # generate prior knowledge
    from . import pknowledge
    prior_knowledge = pknowledge.init_prior_knowledge(conf, args, evmap)
    timer.lap("make-prior-knowledge")

    # generate dag
    graph = estimate_dag(conf, input_df, prior_knowledge)
    timer.lap("estimate-dag")
    if graph is None:
        _logger.info("job({0}) failed on causal inference".format(jobname))
        return None

    # record dag
    ldag = showdag.LogDAG(args, graph=graph, evmap=evmap)
    if do_dump:
        ldag.dump()
    timer.stop()
    return ldag
Ejemplo n.º 7
0
def makedag_main(args):
    jobname = arguments.args2name(args)
    conf, dt_range, area = args

    timer = common.Timer("makedag job({0})".format(jobname), output=_logger)
    timer.start()

    ci_func = conf.get("dag", "ci_func")
    binarize = is_binarize(ci_func)
    # generate event set and evmap, and apply preprocessing
    # d_input, evmap = log2event.ts2input(conf, dt_range, area, binarize)
    input_df, evmap = log2event.makeinput(conf, dt_range, area, binarize)
    _logger.info("{0} pc input shape: {1}".format(jobname, input_df.shape))
    evmap.dump(conf, args)
    timer.lap("load-nodes")

    node_ids = evmap.eids()
    g = _complete_graph(node_ids)
    if conf.getboolean("pc_prune", "do_pruning"):
        from . import prune
        n_edges_before = g.number_of_edges()
        init_graph = prune.prune_graph(g, conf, evmap)
        n_edges_after = init_graph.number_of_edges()
        _logger.info("{0} DAG edge pruning: ".format(jobname) + \
                     "{0} -> {1}".format(n_edges_before, n_edges_after))
    else:
        n_edges = g.number_of_edges()
        init_graph = g
        _logger.info("{0} DAG edge candidates: ".format(jobname) + \
                     "{0}".format(n_edges))
    timer.lap("prune-dag")

    graph = estimate_dag(conf, input_df, ci_func, init_graph)
    timer.lap("estimate-dag")

    # record dag
    ldag = showdag.LogDAG(args, graph)
    ldag.dump()
    timer.stop()
    return ldag
Ejemplo n.º 8
0
def measure_parameters(conf, targets, method):
    param_candidates = list(_get_param_candidates(method))
    n_trial = len(param_candidates)
    ps = ParameterSearcher(conf, n_trial)
    ps.load()

    from amulog import log_db
    for trial_id, params in enumerate(param_candidates):
        timer = common.Timer("measure-parameters trial{0}".format(
            trial_id), output=_logger)
        timer.start()
        ps.init_trial(trial_id, params)
        table = lt_common.TemplateTable()
        ltgen = _init_ltgen_with_params(conf, table, method, params)

        input_lines = list(amulog.manager.iter_plines(conf, targets))
        d_tid = ltgen.process_offline(input_lines)
        iterobj = zip(input_lines,
                      ps.tid_list_answer(),
                      ps.iter_tpl_answer())
        for mid, (pline, tid_answer, tpl_answer) in enumerate(iterobj):
            if tid_answer is None:
                tid_trial = None
                tpl_trial = None
            else:
                tid_trial = d_tid[mid]
                if tid_trial is None:
                    tpl_trial = None
                else:
                    try:
                        tpl_trial = ltgen.get_tpl(tid_trial)
                    except:
                        import pdb; pdb.set_trace()
            ps.add_trial(tid_trial, tpl_trial,
                         tid_answer, tpl_answer, pline["words"])
        ps.dump_trial()
        timer.stop()

    return ps
Ejemplo n.º 9
0
 def makedag_sprocess(am):
     timer = common.Timer("makedag task", output=_logger)
     timer.start()
     for args in am:
         makedag.makedag_main(args)
     timer.stop()