Example #1
0
def crf_trainfile(conf, iterobj):
    from . import lt_crf
    table = lt_common.TemplateTable()
    ltgen = lt_crf.init_ltgen_crf(conf, table)
    l_train = make_crf_train(conf, iterobj)
    l_buf = [_items.items2str(ltgen.trainitems(lm)) for lm in l_train]
    return "\n\n".join(l_buf)
Example #2
0
def make_crf_train(conf, iterobj, return_ltidlist=False):
    method = conf["log_template_crf"]["sample_method"]
    size = conf.getint("log_template_crf", "n_sample")
    if method == "all":
        l_train = list(iterobj)
    elif method == "random":
        l_train = train_sample_random(iterobj, size)
    elif method == "ltgen":
        lt_methods = config.getlist(conf, "log_template_crf",
                                    "sample_lt_methods")
        use_mp = conf.getboolean("log_template_crf", "sample_lt_multiprocess")
        table = lt_common.TemplateTable()
        ltgen = amulog.manager.init_ltgen_methods(conf, table, lt_methods,
                                                  multiprocess=use_mp)
        l_train = train_sample_ltgen(iterobj, size, ltgen)
    elif method == "leak":
        l_train = train_sample_leak(iterobj, size)
    else:
        raise NotImplementedError(
            "Invalid sampling method name {0}".format(method))

    if return_ltidlist:
        train_ltidlist = [lm.lt.ltid for lm in l_train]
        return l_train, train_ltidlist
    else:
        return l_train
Example #3
0
def make_crf_model_from_trainfile(conf, fp, output=None):
    from . import lt_crf
    table = lt_common.TemplateTable()
    ltgen = lt_crf.init_ltgen_crf(conf, table)
    ltgen.init_trainer()
    model_path = ltgen.train_from_file(fp, output)
    assert os.path.exists(model_path)
    _logger.info("generate crf model {0}".format(model_path))
    return model_path
Example #4
0
    def _try_method(self, conf, online=True):
        table = lt_common.TemplateTable()
        ltgen = manager.init_ltgen_methods(conf, table)

        iterobj = manager.iter_plines(conf, [self._path_testlog])
        if online:
            for pline in iterobj:
                ltgen.process_line(pline)
        else:
            d_pline = {mid: pline for mid, pline in enumerate(iterobj)}
            ltgen.process_offline(d_pline)
        return table
Example #5
0
def generate_lt_file(conf, fp):
    lp = logparser.LogParser(conf)
    table = lt_common.TemplateTable()
    ltgen = lt_common.init_ltgen(conf, table, "crf")

    with open(fp, 'r') as f:
        for line in f:
            line = line.rstrip()
            dt, org_host, l_w, l_s = lp.process_line(line)
            tpl = ltgen.estimate_tpl(l_w, l_s)
            print(line)
            print(" ".join(tpl))
            print("")
Example #6
0
    def test_tagging(self):
        from amulog.alg.crf import init_ltgen
        conf = config.open_config()
        conf["log_template_crf"]["model_filename"] = self._path_model

        table = lt_common.TemplateTable()
        ltgen = init_ltgen(conf, table)
        ltgen.init_trainer()
        ltgen.train_from_file(self._path_trainfile)

        tmp_pline = {"words": self.data_test}
        tpl = ltgen.generate_tpl(tmp_pline)
        self.assertTrue("ssh" in tpl)
        self.assertTrue(lt_common.REPLACER in tpl)
Example #7
0
def make_crf_model(conf, iterobj, output=None,
                   return_sampled_messages=False):
    from . import lt_crf
    table = lt_common.TemplateTable()
    ltgen = lt_crf.init_ltgen_crf(conf, table)

    l_train = make_crf_train(conf, iterobj)
    ltgen.init_trainer()
    model_path = ltgen.train(l_train, output)
    assert os.path.exists(model_path)
    _logger.info("generate crf model {0}".format(model_path))

    if return_sampled_messages:
        return model_path, l_train
    else:
        return model_path
Example #8
0
    def test_tagging(self):
        conf = config.open_config()
        sym = conf.get("log_template", "variable_symbol")
        table = lt_common.TemplateTable()
        #converter = convert.FeatureExtracter()
        ltgen = lt_crf.LTGenCRF(table, sym, conf)

        l_items = []
        for data_line in self.data_train:
            lineitem = [item.split() for item in data_line]
            l_items.append(lineitem) 
        ltgen.init_trainer()
        ltgen.train(l_items)

        tid, state = ltgen.process_line(self.data_test, None)
        tpl = ltgen._table.get_template(tid)
        self.assertTrue("ssh" in tpl)
        self.assertTrue(sym in tpl)
        common.rm(ltgen.model)
Example #9
0
def measure_parameters(conf, targets, method):
    param_candidates = list(_get_param_candidates(method))
    n_trial = len(param_candidates)
    ps = ParameterSearcher(conf, n_trial)
    ps.load()

    from amulog import log_db
    for trial_id, params in enumerate(param_candidates):
        timer = common.Timer("measure-parameters trial{0}".format(
            trial_id), output=_logger)
        timer.start()
        ps.init_trial(trial_id, params)
        table = lt_common.TemplateTable()
        ltgen = _init_ltgen_with_params(conf, table, method, params)

        input_lines = list(amulog.manager.iter_plines(conf, targets))
        d_tid = ltgen.process_offline(input_lines)
        iterobj = zip(input_lines,
                      ps.tid_list_answer(),
                      ps.iter_tpl_answer())
        for mid, (pline, tid_answer, tpl_answer) in enumerate(iterobj):
            if tid_answer is None:
                tid_trial = None
                tpl_trial = None
            else:
                tid_trial = d_tid[mid]
                if tid_trial is None:
                    tpl_trial = None
                else:
                    try:
                        tpl_trial = ltgen.get_tpl(tid_trial)
                    except:
                        import pdb; pdb.set_trace()
            ps.add_trial(tid_trial, tpl_trial,
                         tid_answer, tpl_answer, pline["words"])
        ps.dump_trial()
        timer.stop()

    return ps
Example #10
0
    def __init__(self, conf, db, lttable, reset_db=False, parallel=False):
        self._conf = conf
        self._reset_db = reset_db
        self._filename = conf["manager"]["indata_filename"]
        self._fail_output = conf["manager"]["fail_output"]
        self._online_batchsize = conf.getint("manager", "online_batchsize")
        self._online_counter = 0
        self._offline_batchsize = conf.getint("manager", "offline_batchsize")
        self._drop_undefhost = conf.getboolean("manager", "undefined_host")
        self._shuffle_import = conf.getboolean("log_template_import", "shuffle")

        self._db = db
        self._lttable = lttable
        self._table = lt_common.TemplateTable()
        self._ltgen: Optional[lt_common.LTGen] = None

        self._pool = None
        if parallel:
            tmp_n_proc = conf["manager"]["n_process"]
            if tmp_n_proc.isdigit():
                n_proc = int(tmp_n_proc)
            else:
                n_proc = os.cpu_count()
            ltgen_kwargs = {"conf": conf,
                            "table": None,  # individual table for child process
                            "shuffle": self._shuffle_import}
            from multiprocessing import Pool
            self._pool = Pool(processes=n_proc,
                              initializer=self._init_pool,
                              initargs=(ltgen_kwargs,))
        else:
            self._lp = load_log2seq(self._conf)
            self._ha = host_alias.init_hostalias(self._conf)
            self._drop_undefhost = conf.getboolean("manager", "undefined_host")
            self._ltgen = init_ltgen_methods(self._conf, self._table)

        self._ltgroup = init_ltgroup(self._conf, self._table)
        if not self._reset_db:
            self._ltgroup.restore_ltg(self._db, self._lttable)