Beispiel #1
0
def main():
    loglines = preprocess_step(log_file, transforms_file, *read_lines_args, **read_lines_kwargs)
    if write_to_pickle_file:
        write_pickle_file(loglines, loglines_file)
    # loglines = read_pickle_file(loglines_file)

    # count cardinality; print unique lines if verbose and there are actually
    # transforms to apply
    log_cardinality(loglines, get_item=operator.attrgetter("processed"), item_title="Transform", verbose=False)

    gen_templates = template_step(loglines, "logcluster", **logcluster_kwargs)
    # gen_templates = template_step(loglines, "stringmatch") # WIP
    if write_to_pickle_file:
        write_pickle_file(gen_templates, gen_templates_file)
    # gen_templates = read_pickle_file(gen_templates_file)

    eval_loglines = genapply_step(loglines, gen_templates, **genapply_kwargs)
    if write_to_pickle_file:
        write_pickle_file(eval_loglines, eval_loglines_file)
    # eval_loglines = read_pickle_file(eval_loglines_file)

    gen_windows = genwindow_step(eval_loglines, **gen_windows_kwargs)
    if write_to_pickle_file:
        write_pickle_file(gen_windows, gen_windows_file)
    # gen_windows = read_pickle_file(modelgen_windows_file)

    # gen_events = event_step(gen_windows, "fp_growth", **fp_growth_kwargs)
    # gen_events = event_step(gen_windows, "paris", **paris_kwargs)
    gen_events = event_step(gen_windows, "glove", **glove_kwargs)
    if write_to_pickle_file:
        write_pickle_file(gen_events, gen_events_file)
    # gen_events = read_pickle_file(gen_events_file)

    """
    # pretty print
    template_d = {template_id : template for (template_id, template) in [(template.id, template) for template in gen_templates]}
    e = []
    for event in gen_events:
        ts = []
        for template_id in event.template_ids:
            ts.append("%s: %s" % (template_id, template_d[template_id]))
        e.append(ts)
    from pprint import pformat
    logger.info("Discovered events:")
    logger.info("\n"+pformat(e))
    """

    timed_events = evalapply_step(gen_events, eval_loglines, **eval_apply_kwargs)
    write_pickle_file(timed_events, timed_events_file)
    # timed_events = read_pickle_file(timed_events_file)

    logger.info("Done!")
Beispiel #2
0
def main():
    loglines = preprocess_step(log_file, transforms_file, *read_lines_args, **read_lines_kwargs)
    write_pickle_file(loglines, transformed_lines_file)
    #loglines = read_pickle_file(transformed_lines_file)
    
    gen_templates = template_step(loglines, "logcluster", **logcluster_kwargs)
    #gen_templates = template_step(loglines, "stringmatch") # WIP
    write_pickle_file(gen_templates, templates_file)
    #gen_templates = read_pickle_file(templates_file)
    
    timed_templates = genapply_step(loglines, gen_templates)
    write_pickle_file(timed_templates, timed_templates_file)
    #timed_templates = read_pickle_file(timed_templates_file)
    
    modelgen_windows = genwindow_step(timed_templates, **modelgen_windows_kwargs)
    write_pickle_file(modelgen_windows, modelgen_windows_file)
    #modelgen_windows = read_pickle_file(modelgen_windows_file)

    gen_events = event_step(modelgen_windows, "fp_growth", **fp_growth_kwargs)
    #gen_events = event_step(modelgen_windows, "paris", **paris_kwargs) # WIP
    write_pickle_file(gen_events, events_file)
    #gen_events = read_pickle_file(events_file)

    """
    # pretty print
    template_d = {template_id : template for (template_id, template) in [(template.id, template) for template in gen_templates]}
    e = []
    for event in gen_events:
        ts = []
        for template_id in event.template_ids:
            #ts.append(template_id)
            ts.append("%s: %s" % (template_id, template_d[template_id].str))
        e.append(ts)
    from pprint import pformat
    logger.info("Discovered events:")
    logger.info("\n"+pformat(e))
    """
    """
    modeleval_windows = evalwindow_step(timed_templates, window_size)
    write_pickle_file(modeleval_windows, modeleval_windows_file)
    #modeleval_windows = read_pickle_file(modeleval_windows_file)
    """

    timed_events = evalapply_step(gen_events, timed_templates, loglines)
    write_pickle_file(timed_events, timed_events_file)
    #timed_events = read_pickle_file(timed_events_file)

    logger.info("Done!")
Beispiel #3
0
def run_pipeline(options):
    read_lines_kwargs = {'transforms_file': options.transforms_file,
                         'gettime_auditd': options.auditd, 
                         'type_template_auditd': options.auditd_templates_file,
                         'ts_start_index': options.ts_start_index, 
                         'ts_end_index': options.ts_end_index,
                         'ts_format': options.ts_format,
                         'skip_num_chars': options.skip_num_chars,
                         'mp': options.mp,}

    loglines = []

    log_files = []
    if options.data_file:
        log_files.append(options.data_file)
    if options.data_dir:
        log_files.extend(glob.glob(os.path.join(options.data_dir, '*')))
    if not log_files or (not options.data_file and not options.data_dir):
        raise RuntimeError('No input specified/available')

    for log_file in log_files:
        loglines.extend(
            preprocess_step(
                log_file,
                **read_lines_kwargs))

    # count cardinality; print unique lines if verbose and there are actually
    # transforms to apply
    log_cardinality(loglines, 
                    get_item=operator.attrgetter('processed'), 
                    item_title='Transform', 
                    verbose=options.verbose and options.transforms_file)

    if options.save_intermediate:
        transformed_lines_file = os.path.join(
            options.pickle_cache_dir, "transformed_lines.pickle")
        write_pickle_file(loglines, transformed_lines_file)

    if read_lines_kwargs.get('type_template_auditd'):
        # Read in auditd template definitions
        templates = get_auditd_templates(options.auditd_templates_file)
    else:
        # Generate templates
        if options.template_gen == 'logcluster':
            logcluster_kwargs = {"support": str(options.template_support)}
            templates = template_step(
                loglines, "logcluster", **logcluster_kwargs)
        elif options.template_gen == 'stringmatch':
            templates = template_step(loglines, "stringmatch")  # WIP
        else:
            raise NotImplementedError(
                '%s Template generation method not implemented' %
                options.template_gen)

        if options.save_intermediate:
            templates_file = os.path.join(
                options.pickle_cache_dir, "templates.pickle")
            write_pickle_file(templates, templates_file)

        log_cardinality(templates, 
                        item_key=operator.attrgetter('id'), 
                        item_title='Template', 
                        verbose=options.verbose)

    timed_templates = genapply_step(loglines, templates, **read_lines_kwargs)
    if options.save_intermediate:
        timed_templates_file = os.path.join(
            options.pickle_cache_dir, "timed_templates.pickle")
        write_pickle_file(timed_templates, timed_templates_file)

    modelgen_windows = genwindow_step(timed_templates, 
                                      window_size=options.gwindow_time, 
                                      tfidf_threshold=options.gtfidf_threshold)
    if options.save_intermediate:
        modelgen_windows_file = os.path.join(
            options.pickle_cache_dir, "modelgen_windows.pickle")
        write_pickle_file(modelgen_windows, modelgen_windows_file)

    if options.event_gen == 'fp-growth':
        fp_growth_kwargs = {
            "min_support": options.min_support,
            "iterations": options.iterations,
            "tfidf_threshold": options.tfidf_threshold}
        gen_events = event_step(
            modelgen_windows,
            "fp_growth",
            **fp_growth_kwargs)
    elif options.event_gen == 'paris':
        paris_kwargs = {
            "r_slack": options.r_slack,
            "num_iterations": options.num_iterations,
            "tau": options.tau}
        gen_events = event_step(
            modelgen_windows,
            "paris",
            **paris_kwargs)  # WIP
    elif options.event_gen == 'glove':
        glove_kwargs = {
            'num_components': options.num_components, 
            'glove_window': options.glove_window, 
            'epochs': options.epochs}
        gen_events = event_step(
            modelgen_windows, 
            "glove", 
            verbose=options.verbose, 
            **glove_kwargs)
    elif options.event_gen == 'auditd':
        # ignore timed_templates and modelgen_window and pass templates to auditd-specific event generator
        gen_events = auditd.event_gen(templates)
    else:
        raise NotImplementedError('%s Not implemented' % options.event_gen)

    if options.save_intermediate:
        events_file = os.path.join(options.pickle_cache_dir, "events.pickle")
        write_pickle_file(gen_events, events_file)

    logger.info("Discovered events: %d" % len(gen_events))
    if options.verbose:
        # Print events and their templates
        if read_lines_kwargs.get('type_template_auditd'):
            template_list = [(templates[template], template) 
                             for template in templates]
        else:
            template_list = [(template.id, template) for template in templates]

        template_d = {
            template_id: template for (
                template_id,
                template) in template_list}
        e = []
        for event in sorted(gen_events, key=lambda event:event.id):
            ts = ["event_id: %s" % event.id]
            for template_id in sorted(event.template_ids):
                ts.append("%s: %s" % (template_id, template_d[template_id]))
            e.append(ts)
        from pprint import pformat
        logger.info("\n" + pformat(e))

        # compute how many times each template was used (i.e. how many events each template appears in)
        event_templates = (
            template_d[template_id] for event in gen_events for template_id in event.template_ids)
        log_cardinality(
            event_templates, 
            item_title='EventTemplate', 
            item_key=operator.attrgetter('id'), 
            verbose=options.verbose)

    timed_events = evalapply_step(
        gen_events, timed_templates, window_time=options.awindow_time, mp=options.mp)
    if options.save_intermediate:
        timed_events_file = os.path.join(
            options.pickle_cache_dir, "timed_events.pickle")
        write_pickle_file(timed_events, timed_events_file)

    logger.info("Timed events: %d" % len(timed_events))
    log_cardinality(
        timed_events, 
        item_title='TimedEvent', 
        get_item=operator.attrgetter('event_id'), 
        verbose=options.verbose)
    if options.verbose > 1:
        # Print timed event summary for -vv
        
        # sort timed_templates in ascending time order
        for te in timed_events:
            te.timed_templates.sort(key=lambda tt: tt.ts)

        if options.sort_events_key=='time':
            # sort timed events in ascending time order (of their first occurring timed_template)
            timed_event_key = lambda te: te.timed_templates[0].ts
        else:
            # sort timed events by event id, then by time order
            timed_event_key = lambda te: (te.event_id, te.timed_templates[0].ts)
        timed_events.sort(key=timed_event_key)

        e = []
        for event in timed_events:
            s = strTimedEvent(event)
            e.append(s)
        logger.info("\n" + pformat(e))

    logger.info("Done!")
Beispiel #4
0
def run_pipeline(options):
    read_lines_kwargs = {
        'transforms_file': options.transforms_file,
        'gettime_auditd': options.auditd,
        'type_template_auditd': options.auditd_templates_file,
        'ts_start_index': options.ts_start_index,
        'ts_end_index': options.ts_end_index,
        'ts_format': options.ts_format,
        'skip_num_chars': options.skip_num_chars,
        'mp': options.mp,
    }

    loglines = []

    log_files = []
    if options.data_file:
        log_files.append(options.data_file)
    if options.data_dir:
        log_files.extend(glob.glob(os.path.join(options.data_dir, '*')))
    if not log_files or (not options.data_file and not options.data_dir):
        raise RuntimeError('No input specified/available')

    for log_file in log_files:
        loglines.extend(preprocess_step(log_file, **read_lines_kwargs))

    # count cardinality; print unique lines if verbose and there are actually
    # transforms to apply
    log_cardinality(loglines,
                    get_item=operator.attrgetter('processed'),
                    item_title='Transform',
                    verbose=options.verbose and options.transforms_file)

    if options.save_intermediate:
        transformed_lines_file = os.path.join(options.pickle_cache_dir,
                                              "transformed_lines.pickle")
        write_pickle_file(loglines, transformed_lines_file)

    if read_lines_kwargs.get('type_template_auditd'):
        # Read in auditd template definitions
        templates = get_auditd_templates(options.auditd_templates_file)
    else:
        # Generate templates
        if options.template_gen == 'logcluster':
            logcluster_kwargs = {"support": str(options.template_support)}
            templates = template_step(loglines, "logcluster",
                                      **logcluster_kwargs)
        elif options.template_gen == 'stringmatch':
            templates = template_step(loglines, "stringmatch")  # WIP
        else:
            raise NotImplementedError(
                '%s Template generation method not implemented' %
                options.template_gen)

        if options.save_intermediate:
            templates_file = os.path.join(options.pickle_cache_dir,
                                          "templates.pickle")
            write_pickle_file(templates, templates_file)

        log_cardinality(templates,
                        item_key=operator.attrgetter('id'),
                        item_title='Template',
                        verbose=options.verbose)

    timed_templates = genapply_step(loglines, templates, **read_lines_kwargs)
    if options.save_intermediate:
        timed_templates_file = os.path.join(options.pickle_cache_dir,
                                            "timed_templates.pickle")
        write_pickle_file(timed_templates, timed_templates_file)

    modelgen_windows = genwindow_step(timed_templates,
                                      window_size=options.gwindow_time,
                                      tfidf_threshold=options.gtfidf_threshold)
    if options.save_intermediate:
        modelgen_windows_file = os.path.join(options.pickle_cache_dir,
                                             "modelgen_windows.pickle")
        write_pickle_file(modelgen_windows, modelgen_windows_file)

    if options.event_gen == 'fp-growth':
        fp_growth_kwargs = {
            "min_support": options.min_support,
            "iterations": options.iterations,
            "tfidf_threshold": options.tfidf_threshold
        }
        gen_events = event_step(modelgen_windows, "fp_growth",
                                **fp_growth_kwargs)
    elif options.event_gen == 'paris':
        paris_kwargs = {
            "r_slack": options.r_slack,
            "num_iterations": options.num_iterations,
            "tau": options.tau
        }
        gen_events = event_step(modelgen_windows, "paris",
                                **paris_kwargs)  # WIP
    elif options.event_gen == 'glove':
        glove_kwargs = {
            'num_components': options.num_components,
            'glove_window': options.glove_window,
            'epochs': options.epochs
        }
        gen_events = event_step(modelgen_windows,
                                "glove",
                                verbose=options.verbose,
                                **glove_kwargs)
    elif options.event_gen == 'auditd':
        # ignore timed_templates and modelgen_window and pass templates to
        # auditd-specific event generator
        gen_events = auditd.event_gen(templates)
    else:
        raise NotImplementedError('%s Not implemented' % options.event_gen)

    if options.save_intermediate:
        events_file = os.path.join(options.pickle_cache_dir, "events.pickle")
        write_pickle_file(gen_events, events_file)

    logger.info("Discovered events: %d" % len(gen_events))
    if options.verbose:
        # Print events and their templates
        if read_lines_kwargs.get('type_template_auditd'):
            template_list = [(templates[template], template)
                             for template in templates]
        else:
            template_list = [(template.id, template) for template in templates]

        template_d = {
            template_id: template
            for (template_id, template) in template_list
        }
        e = []
        for event in sorted(gen_events, key=lambda event: event.id):
            ts = ["event_id: %s" % event.id]
            for template_id in sorted(event.template_ids):
                ts.append("%s: %s" % (template_id, template_d[template_id]))
            e.append(ts)
        from pprint import pformat
        logger.info("\n" + pformat(e))

        # compute how many times each template was used (i.e. how many events
        # each template appears in)
        event_templates = (template_d[template_id] for event in gen_events
                           for template_id in event.template_ids)
        log_cardinality(event_templates,
                        item_title='EventTemplate',
                        item_key=operator.attrgetter('id'),
                        verbose=options.verbose)

    timed_events = evalapply_step(gen_events,
                                  timed_templates,
                                  window_time=options.awindow_time,
                                  mp=options.mp)
    if options.save_intermediate:
        timed_events_file = os.path.join(options.pickle_cache_dir,
                                         "timed_events.pickle")
        write_pickle_file(timed_events, timed_events_file)

    logger.info("Timed events: %d" % len(timed_events))
    log_cardinality(timed_events,
                    item_title='TimedEvent',
                    get_item=operator.attrgetter('event_id'),
                    verbose=options.verbose)
    if options.verbose > 1:
        # Print timed event summary for -vv

        # sort timed_templates in ascending time order
        for te in timed_events:
            te.timed_templates.sort(key=lambda tt: tt.ts)

        if options.sort_events_key == 'time':
            # sort timed events in ascending time order (of their first
            # occurring timed_template)
            timed_event_key = lambda te: te.timed_templates[0].ts
        else:
            # sort timed events by event id, then by time order
            timed_event_key = lambda te: (te.event_id, te.timed_templates[0].ts
                                          )
        timed_events.sort(key=timed_event_key)

        e = []
        for event in timed_events:
            s = strTimedEvent(event)
            e.append(s)
        logger.info("\n" + pformat(e))

    logger.info("Done!")
Beispiel #5
0
def run_auditd_pipeline(options):
    # Read in auditd template definitions
    auditd_templates = get_auditd_templates(options.auditd_templates_file)

    # Skip traditional preprocessing for auditd
    loglines = []
    for log_file in glob.glob(os.path.join(options.data_dir, '*')):
        loglines.extend(preprocess_auditd_step(log_file))
    if options.save_intermediate:
        transformed_lines_file = os.path.join(options.pickle_cache_dir, "transformed_lines.pickle")
        write_pickle_file(loglines, transformed_lines_file)

    auditd_genapply_kwargs = {'process_auditd': True}
    timed_templates = genapply_step(loglines, auditd_templates, **auditd_genapply_kwargs)
    if options.save_intermediate:
        timed_templates_file = os.path.join(options.pickle_cache_dir, "timed_templates.pickle")
        write_pickle_file(timed_templates, timed_templates_file)

    modelgen_windows = genwindow_step(timed_templates, options.window_size)
    if options.save_intermediate:
        modelgen_windows_file = os.path.join(options.pickle_cache_dir, "modelgen_windows.pickle")
        write_pickle_file(modelgen_windows, modelgen_windows_file)


    if options.event_gen =='fp-growth':
        fp_growth_kwargs = {"min_support": 0.03, "iterations": -1} #only return 10000 itemsets, iterations = -1 will return all
        gen_events = event_step(modelgen_windows, "fp_growth", **fp_growth_kwargs)
    elif options.event_gen == 'paris':
        paris_kwargs = {"r_slack": 0, "num_iterations":3}
        gen_events = event_step(modelgen_windows, "paris", **paris_kwargs) # WIP
    else:
        raise NotImplementedError('%s Not implemented'%options.event_gen)

    if options.save_intermediate:
        events_file = os.path.join(options.pickle_cache_dir, "events.pickle")
        write_pickle_file(gen_events, events_file)


    if options.verbose:
        # Print templates
        template_d = {template_id : template for (template_id, template) in [(auditd_templates[template], template) for template in auditd_templates]}
        e = []
        for event in gen_events:
            ts = []
            for template_id in event.template_ids:
                ts.append("%s: %s" % (template_id, template_d[template_id]))
            e.append(ts)
        from pprint import pformat
        logger.info("Discovered events:")
        logger.info("\n"+pformat(e))

    """
    modeleval_windows = evalwindow_step(timed_templates, options.window_size)
    if options.save_intermediate:
        modeleval_windows_file = os.path.join(options.pickle_cache_dir, "modeleval_windows.pickle")
        write_pickle_file(modeleval_windows, modeleval_windows_file)
    """

    timed_events = evalapply_step(gen_events, timed_templates, loglines)
    if options.save_intermediate:
        timed_events_file = os.path.join(options.pickle_cache_dir, "timed_events.pickle")
        write_pickle_file(timed_events, timed_events_file)

    logger.info("Done!")
Beispiel #6
0
def main():
    loglines = preprocess_step(
        log_file,
        transforms_file,
        *read_lines_args,
        **read_lines_kwargs)
    if write_to_pickle_file:
        write_pickle_file(loglines, loglines_file)
    #loglines = read_pickle_file(loglines_file)

    # count cardinality; print unique lines if verbose and there are actually
    # transforms to apply
    log_cardinality(loglines,
                    get_item=operator.attrgetter('processed'),
                    item_title='Transform',
                    verbose=False)

    gen_templates = template_step(loglines, "logcluster", **logcluster_kwargs)
    # gen_templates = template_step(loglines, "stringmatch") # WIP
    if write_to_pickle_file:
        write_pickle_file(gen_templates, gen_templates_file)
    #gen_templates = read_pickle_file(gen_templates_file)

    eval_loglines = genapply_step(loglines, gen_templates, **genapply_kwargs)
    if write_to_pickle_file:
        write_pickle_file(eval_loglines, eval_loglines_file)
    #eval_loglines = read_pickle_file(eval_loglines_file)

    gen_windows = genwindow_step(eval_loglines, **gen_windows_kwargs)
    if write_to_pickle_file:
        write_pickle_file(gen_windows, gen_windows_file)
    #gen_windows = read_pickle_file(modelgen_windows_file)

    #gen_events = event_step(gen_windows, "fp_growth", **fp_growth_kwargs)
    #gen_events = event_step(gen_windows, "paris", **paris_kwargs)
    gen_events = event_step(gen_windows, "glove", **glove_kwargs)
    if write_to_pickle_file:
        write_pickle_file(gen_events, gen_events_file)
    #gen_events = read_pickle_file(gen_events_file)

    """
    # pretty print
    template_d = {template_id : template for (template_id, template) in [(template.id, template) for template in gen_templates]}
    e = []
    for event in gen_events:
        ts = []
        for template_id in event.template_ids:
            ts.append("%s: %s" % (template_id, template_d[template_id]))
        e.append(ts)
    from pprint import pformat
    logger.info("Discovered events:")
    logger.info("\n"+pformat(e))
    """

    timed_events = evalapply_step(
        gen_events,
        eval_loglines,
        **eval_apply_kwargs)
    write_pickle_file(timed_events, timed_events_file)
    #timed_events = read_pickle_file(timed_events_file)

    logger.info("Done!")