Beispiel #1
0
def main():
    loglines = preprocess_step(log_file, transforms_file, *read_lines_args, **read_lines_kwargs)
    if write_to_pickle_file:
        write_pickle_file(loglines, loglines_file)
    # loglines = read_pickle_file(loglines_file)

    # count cardinality; print unique lines if verbose and there are actually
    # transforms to apply
    log_cardinality(loglines, get_item=operator.attrgetter("processed"), item_title="Transform", verbose=False)

    gen_templates = template_step(loglines, "logcluster", **logcluster_kwargs)
    # gen_templates = template_step(loglines, "stringmatch") # WIP
    if write_to_pickle_file:
        write_pickle_file(gen_templates, gen_templates_file)
    # gen_templates = read_pickle_file(gen_templates_file)

    eval_loglines = genapply_step(loglines, gen_templates, **genapply_kwargs)
    if write_to_pickle_file:
        write_pickle_file(eval_loglines, eval_loglines_file)
    # eval_loglines = read_pickle_file(eval_loglines_file)

    gen_windows = genwindow_step(eval_loglines, **gen_windows_kwargs)
    if write_to_pickle_file:
        write_pickle_file(gen_windows, gen_windows_file)
    # gen_windows = read_pickle_file(modelgen_windows_file)

    # gen_events = event_step(gen_windows, "fp_growth", **fp_growth_kwargs)
    # gen_events = event_step(gen_windows, "paris", **paris_kwargs)
    gen_events = event_step(gen_windows, "glove", **glove_kwargs)
    if write_to_pickle_file:
        write_pickle_file(gen_events, gen_events_file)
    # gen_events = read_pickle_file(gen_events_file)

    """
    # pretty print
    template_d = {template_id : template for (template_id, template) in [(template.id, template) for template in gen_templates]}
    e = []
    for event in gen_events:
        ts = []
        for template_id in event.template_ids:
            ts.append("%s: %s" % (template_id, template_d[template_id]))
        e.append(ts)
    from pprint import pformat
    logger.info("Discovered events:")
    logger.info("\n"+pformat(e))
    """

    timed_events = evalapply_step(gen_events, eval_loglines, **eval_apply_kwargs)
    write_pickle_file(timed_events, timed_events_file)
    # timed_events = read_pickle_file(timed_events_file)

    logger.info("Done!")
Beispiel #2
0
def run_pipeline(options):
    read_lines_kwargs = {'transforms_file': options.transforms_file,
                         'gettime_auditd': options.auditd, 
                         'type_template_auditd': options.auditd_templates_file,
                         'ts_start_index': options.ts_start_index, 
                         'ts_end_index': options.ts_end_index,
                         'ts_format': options.ts_format,
                         'skip_num_chars': options.skip_num_chars,
                         'mp': options.mp,}

    loglines = []

    log_files = []
    if options.data_file:
        log_files.append(options.data_file)
    if options.data_dir:
        log_files.extend(glob.glob(os.path.join(options.data_dir, '*')))
    if not log_files or (not options.data_file and not options.data_dir):
        raise RuntimeError('No input specified/available')

    for log_file in log_files:
        loglines.extend(
            preprocess_step(
                log_file,
                **read_lines_kwargs))

    # count cardinality; print unique lines if verbose and there are actually
    # transforms to apply
    log_cardinality(loglines, 
                    get_item=operator.attrgetter('processed'), 
                    item_title='Transform', 
                    verbose=options.verbose and options.transforms_file)

    if options.save_intermediate:
        transformed_lines_file = os.path.join(
            options.pickle_cache_dir, "transformed_lines.pickle")
        write_pickle_file(loglines, transformed_lines_file)

    if read_lines_kwargs.get('type_template_auditd'):
        # Read in auditd template definitions
        templates = get_auditd_templates(options.auditd_templates_file)
    else:
        # Generate templates
        if options.template_gen == 'logcluster':
            logcluster_kwargs = {"support": str(options.template_support)}
            templates = template_step(
                loglines, "logcluster", **logcluster_kwargs)
        elif options.template_gen == 'stringmatch':
            templates = template_step(loglines, "stringmatch")  # WIP
        else:
            raise NotImplementedError(
                '%s Template generation method not implemented' %
                options.template_gen)

        if options.save_intermediate:
            templates_file = os.path.join(
                options.pickle_cache_dir, "templates.pickle")
            write_pickle_file(templates, templates_file)

        log_cardinality(templates, 
                        item_key=operator.attrgetter('id'), 
                        item_title='Template', 
                        verbose=options.verbose)

    timed_templates = genapply_step(loglines, templates, **read_lines_kwargs)
    if options.save_intermediate:
        timed_templates_file = os.path.join(
            options.pickle_cache_dir, "timed_templates.pickle")
        write_pickle_file(timed_templates, timed_templates_file)

    modelgen_windows = genwindow_step(timed_templates, 
                                      window_size=options.gwindow_time, 
                                      tfidf_threshold=options.gtfidf_threshold)
    if options.save_intermediate:
        modelgen_windows_file = os.path.join(
            options.pickle_cache_dir, "modelgen_windows.pickle")
        write_pickle_file(modelgen_windows, modelgen_windows_file)

    if options.event_gen == 'fp-growth':
        fp_growth_kwargs = {
            "min_support": options.min_support,
            "iterations": options.iterations,
            "tfidf_threshold": options.tfidf_threshold}
        gen_events = event_step(
            modelgen_windows,
            "fp_growth",
            **fp_growth_kwargs)
    elif options.event_gen == 'paris':
        paris_kwargs = {
            "r_slack": options.r_slack,
            "num_iterations": options.num_iterations,
            "tau": options.tau}
        gen_events = event_step(
            modelgen_windows,
            "paris",
            **paris_kwargs)  # WIP
    elif options.event_gen == 'glove':
        glove_kwargs = {
            'num_components': options.num_components, 
            'glove_window': options.glove_window, 
            'epochs': options.epochs}
        gen_events = event_step(
            modelgen_windows, 
            "glove", 
            verbose=options.verbose, 
            **glove_kwargs)
    elif options.event_gen == 'auditd':
        # ignore timed_templates and modelgen_window and pass templates to auditd-specific event generator
        gen_events = auditd.event_gen(templates)
    else:
        raise NotImplementedError('%s Not implemented' % options.event_gen)

    if options.save_intermediate:
        events_file = os.path.join(options.pickle_cache_dir, "events.pickle")
        write_pickle_file(gen_events, events_file)

    logger.info("Discovered events: %d" % len(gen_events))
    if options.verbose:
        # Print events and their templates
        if read_lines_kwargs.get('type_template_auditd'):
            template_list = [(templates[template], template) 
                             for template in templates]
        else:
            template_list = [(template.id, template) for template in templates]

        template_d = {
            template_id: template for (
                template_id,
                template) in template_list}
        e = []
        for event in sorted(gen_events, key=lambda event:event.id):
            ts = ["event_id: %s" % event.id]
            for template_id in sorted(event.template_ids):
                ts.append("%s: %s" % (template_id, template_d[template_id]))
            e.append(ts)
        from pprint import pformat
        logger.info("\n" + pformat(e))

        # compute how many times each template was used (i.e. how many events each template appears in)
        event_templates = (
            template_d[template_id] for event in gen_events for template_id in event.template_ids)
        log_cardinality(
            event_templates, 
            item_title='EventTemplate', 
            item_key=operator.attrgetter('id'), 
            verbose=options.verbose)

    timed_events = evalapply_step(
        gen_events, timed_templates, window_time=options.awindow_time, mp=options.mp)
    if options.save_intermediate:
        timed_events_file = os.path.join(
            options.pickle_cache_dir, "timed_events.pickle")
        write_pickle_file(timed_events, timed_events_file)

    logger.info("Timed events: %d" % len(timed_events))
    log_cardinality(
        timed_events, 
        item_title='TimedEvent', 
        get_item=operator.attrgetter('event_id'), 
        verbose=options.verbose)
    if options.verbose > 1:
        # Print timed event summary for -vv
        
        # sort timed_templates in ascending time order
        for te in timed_events:
            te.timed_templates.sort(key=lambda tt: tt.ts)

        if options.sort_events_key=='time':
            # sort timed events in ascending time order (of their first occurring timed_template)
            timed_event_key = lambda te: te.timed_templates[0].ts
        else:
            # sort timed events by event id, then by time order
            timed_event_key = lambda te: (te.event_id, te.timed_templates[0].ts)
        timed_events.sort(key=timed_event_key)

        e = []
        for event in timed_events:
            s = strTimedEvent(event)
            e.append(s)
        logger.info("\n" + pformat(e))

    logger.info("Done!")