def template_step(lines, template_algorithm="logcluster", *args, **kwargs): CHOICES = {"logcluster": logcluster_substep, "stringmatch": stringmatch_substep} template_fn = CHOICES.get(template_algorithm, None) if not template_fn: log_exc(logger, "template_algorithm must be one of: %s" % CHOICES) gen_templates = template_fn(lines, *args, **kwargs) return gen_templates
def logcluster(lines, *args, **kwargs): """ This function uses the logcluster algorithm (available at http://ristov.github.io/logcluster/) to cluster log files and mine line patterns. See http://ristov.github.io/publications/cnsm15-logcluster-web.pdf for additional details on the algorithm. The current implementation writes loglines to a temporary file then feeds it to the logcluster command line tool (written in perl). Eventually, the goal is to fully translate logcluster.pl into python to eliminate this step. Behavior of this function differs depending on how of lines and file_path are set: lines AND file_path set: write lines to file at file_path lines BUT NOT file_path set: write lines to temporary file file_path BUT NOT lines: pass file_path directly into logcluster NEITHER lines NOR file_path: throw exception Args: lines: an iterable of LogLine named tuples *args: **kwargs: Kwargs: file_path (string): target path to pass to logcluster.pl (only used if lines is None, otherwise ignored). All other kwargs are passed on the command line to logcluster.pl. See above for details. Returns: templates: a list of Template named tuples """ file_path = kwargs.pop("file_path", None) fp = None if lines and file_path: logger.info("Writing lines to file: %s", file_path) LogCluster.write_file(lines, file_path) elif lines and not file_path: fp = tempfile.NamedTemporaryFile() file_path = fp.name logger.info("Writing lines to temporary file: %s", file_path) LogCluster.write_file(lines, file_path) elif not lines and file_path: logger.info("Using existing lines in file: %s", file_path) else: # not lines and not passed_file_path log_exc( logger, "Must pass either argument 'lines' or keyword argument 'file_path' (or both)." ) support = kwargs.pop("support", None) if not support: log_exc(logger, "Must pass kwarg 'support'.") output = LogCluster.run_on_file(file_path, support, *args, **kwargs) if fp: # Temporary files are deleted when closed. logger.info("Closing file: %s", file_path) fp.close() templates = LogCluster.parse_output(output) return templates
def logcluster(lines, *args, **kwargs): """ This function uses the logcluster algorithm (available at http://ristov.github.io/logcluster/) to cluster log files and mine line patterns. See http://ristov.github.io/publications/cnsm15-logcluster-web.pdf for additional details on the algorithm. The current implementation writes loglines to a temporary file then feeds it to the logcluster command line tool (written in perl). Eventually, the goal is to fully translate logcluster.pl into python to eliminate this step. Behavior of this function differs depending on how of lines and file_path are set: lines AND file_path set: write lines to file at file_path lines BUT NOT file_path set: write lines to temporary file file_path BUT NOT lines: pass file_path directly into logcluster NEITHER lines NOR file_path: throw exception Args: lines: an iterable of LogLine named tuples *args: **kwargs: Kwargs: file_path (string): target path to pass to logcluster.pl (only used if lines is None, otherwise ignored). All other kwargs are passed on the command line to logcluster.pl. See above for details. Returns: templates: a list of Template named tuples """ file_path = kwargs.pop("file_path", None) fp = None if lines and file_path: logger.info("Writing lines to file: %s", file_path) LogCluster.write_file(lines, file_path) elif lines and not file_path: fp = tempfile.NamedTemporaryFile() file_path = fp.name logger.info("Writing lines to temporary file: %s", file_path) LogCluster.write_file(lines, file_path) elif not lines and file_path: logger.info("Using existing lines in file: %s", file_path) else: # not lines and not passed_file_path log_exc( logger, "Must pass either argument 'lines' or keyword argument 'file_path' (or both).") support = kwargs.pop("support", None) if not support: log_exc(logger, "Must pass kwarg 'support'.") output = LogCluster.run_on_file(file_path, support, *args, **kwargs) if fp: # Temporary files are deleted when closed. logger.info("Closing file: %s", file_path) fp.close() templates = LogCluster.parse_output(output) return templates
def template_step(lines, template_algorithm="logcluster", *args, **kwargs): CHOICES = { "logcluster": logcluster_substep, "stringmatch": stringmatch_substep } template_fn = CHOICES.get(template_algorithm, None) if not template_fn: log_exc(logger, "template_algorithm must be one of: %s" % CHOICES) gen_templates = template_fn(lines, *args, **kwargs) return gen_templates
def event_step(gen_windows, event_algorithm="fp_growth", *args, **kwargs): CHOICES = { "fp_growth": fp_growth_substep, "paris": paris_substep, "glove": glove_substep} event_fn = CHOICES.get(event_algorithm, None) if not event_fn: log_exc(logger, "event_algorithm must be one of: %s" % CHOICES) threshold = kwargs.pop("tfidf_threshold", None) gen_events = event_fn(gen_windows, *args, **kwargs) logger.info("==========Custom post processing for sample data==========") if threshold is not None: # Note that calling this will reassign random event IDs. logger.info( "Applying a tfidf filter to each event's template_ids. (threshold = %s)", threshold) gen_events = tfidf_filter_events(gen_events, threshold) else: logger.info("Skipping tfidf filter") logger.info("==========End custom post processing==========") return gen_events
def event_step(gen_windows, event_algorithm="fp_growth", *args, **kwargs): CHOICES = { "fp_growth": fp_growth_substep, "paris": paris_substep, "glove": glove_substep } event_fn = CHOICES.get(event_algorithm, None) if not event_fn: log_exc(logger, "event_algorithm must be one of: %s" % CHOICES) threshold = kwargs.pop("tfidf_threshold", None) gen_events = event_fn(gen_windows, *args, **kwargs) logger.info("==========Custom post processing for sample data==========") if threshold is not None: # Note that calling this will reassign random event IDs. logger.info( "Applying a tfidf filter to each event's template_ids. (threshold = %s)", threshold) gen_events = tfidf_filter_events(gen_events, threshold) else: logger.info("Skipping tfidf filter") logger.info("==========End custom post processing==========") return gen_events