Esempio n. 1
0
    def __init__(self,
                 depth=4,
                 sim_th=0.4,
                 max_children=100,
                 max_clusters=None,
                 extra_delimiters=(),
                 profiler: Profiler = NullProfiler()):
        """
        Attributes
        ----------
            depth : depth of all leaf nodes (nodes that contain log clusters)
            sim_th : similarity threshold - if percentage of similar tokens for a log message is below this
                number, a new log cluster will be created.
            max_children : max number of children of an internal node
            max_clusters : max number of tracked clusters (unlimited by default).
                When this number is reached, model starts replacing old clusters
                with a new ones according to the LRU policy.
            extra_delimiters: delimiters to apply when splitting log message into words (in addition to whitespace).
        """
        self.depth = depth - 2  # number of prefix tokens in each tree path (exclude root and leaf node)
        self.sim_th = sim_th
        self.max_children = max_children
        self.root_node = Node()
        self.profiler = profiler
        self.extra_delimiters = extra_delimiters
        self.max_clusters = max_clusters

        # key: int, value: LogCluster
        self.id_to_cluster = {} if max_clusters is None else LRUCache(
            maxsize=max_clusters)
        self.clusters_counter = 0
Esempio n. 2
0
    def __init__(self,
                 persistence_handler: PersistenceHandler = None,
                 config: TemplateMinerConfig = None):
        """
        Wrapper for Drain with persistence and masking support

        :param persistence_handler: The type of persistence to use. When None, no persistence is applied.
        :param config: Configuration object. When none, configuration is loaded from default .ini file (if exist)
        """
        logger.info("Starting Drain3 template miner")

        if config is None:
            logger.info(f"Loading configuration from {config_filename}")
            config = TemplateMinerConfig()
            config.load(config_filename)

        self.config = config

        self.profiler: Profiler = NullProfiler()
        if self.config.profiling_enabled:
            self.profiler = SimpleProfiler()

        self.persistence_handler = persistence_handler

        self.drain = Drain(sim_th=self.config.drain_sim_th,
                           depth=self.config.drain_depth,
                           max_children=self.config.drain_max_children,
                           max_clusters=self.config.drain_max_clusters,
                           extra_delimiters=self.config.drain_extra_delimiters,
                           profiler=self.profiler)
        self.masker = LogMasker(self.config.masking_instructions)
        self.last_save_time = time.time()
        if persistence_handler is not None:
            self.load_state()
Esempio n. 3
0
File: drain.py Progetto: IBM/Drain3
    def __init__(self,
                 depth=4,
                 sim_th=0.4,
                 max_children=100,
                 max_clusters=None,
                 extra_delimiters=(),
                 profiler: Profiler = NullProfiler(),
                 param_str="<*>",
                 parametrize_numeric_tokens=True):
        """
        Create a new Drain instance.

        :param depth: max depth levels of log clusters. Minimum is 2.
            For example, for depth==4, Root is considered depth level 1.
            Token count is considered depth level 2.
            First log token is considered depth level 3.
            Log clusters below first token node are considered depth level 4.
        :param sim_th: similarity threshold - if percentage of similar tokens for a log message is below this
            number, a new log cluster will be created.
        :param max_children: max number of children of an internal node
        :param max_clusters: max number of tracked clusters (unlimited by default).
            When this number is reached, model starts replacing old clusters
            with a new ones according to the LRU policy.
        :param extra_delimiters: delimiters to apply when splitting log message into words (in addition to whitespace).
        :param parametrize_numeric_tokens: whether to treat tokens that contains at least one digit
            as template parameters.
        """
        if depth < 3:
            raise ValueError("depth argument must be at least 3")

        self.log_cluster_depth = depth
        self.max_node_depth = depth - 2  # max depth of a prefix tree node, starting from zero
        self.sim_th = sim_th
        self.max_children = max_children
        self.root_node = Node()
        self.profiler = profiler
        self.extra_delimiters = extra_delimiters
        self.max_clusters = max_clusters
        self.param_str = param_str
        self.parametrize_numeric_tokens = parametrize_numeric_tokens

        # key: int, value: LogCluster
        self.id_to_cluster = {} if max_clusters is None else LogClusterCache(
            maxsize=max_clusters)
        self.clusters_counter = 0
Esempio n. 4
0
    def __init__(self, persistence_handler: PersistenceHandler = None):
        logger.info("Starting Drain3 template miner")
        self.config = configparser.ConfigParser()
        self.config.read(config_filename)

        self.profiler: Profiler = NullProfiler()
        self.profiling_report_sec = self.config.getint('PROFILING',
                                                       'report_sec',
                                                       fallback=60)
        if self.config.getboolean('PROFILING', 'enabled', fallback=False):
            self.profiler = SimpleProfiler()

        self.persistence_handler = persistence_handler
        self.snapshot_interval_seconds = self.config.getint(
            'SNAPSHOT', 'snapshot_interval_minutes', fallback=5) * 60
        self.compress_state = self.config.getboolean('SNAPSHOT',
                                                     'compress_state',
                                                     fallback=True)

        extra_delimiters = self.config.get('DRAIN',
                                           'extra_delimiters',
                                           fallback="[]")
        extra_delimiters = ast.literal_eval(extra_delimiters)

        self.drain = Drain(sim_th=self.config.getfloat('DRAIN',
                                                       'sim_th',
                                                       fallback=0.4),
                           depth=self.config.getint('DRAIN',
                                                    'depth',
                                                    fallback=4),
                           max_children=self.config.getint('DRAIN',
                                                           'max_children',
                                                           fallback=100),
                           max_clusters=self.config.getint('DRAIN',
                                                           'max_clusters',
                                                           fallback=None),
                           extra_delimiters=extra_delimiters,
                           profiler=self.profiler)
        self.masker = LogMasker(self.config)
        self.last_save_time = time.time()
        if persistence_handler is not None:
            self.load_state()
Esempio n. 5
0
 def __init__(self,
              depth=4,
              sim_th=0.4,
              max_children=100,
              extra_delimiters=(),
              profiler: Profiler = NullProfiler()):
     """
     Attributes
     ----------
         depth : depth of all leaf nodes (nodes that contain log clusters)
         sim_th : similarity threshold - if percentage of similar tokens for a log message is below this
             number, a new log cluster will be created.
         max_children : max number of children of an internal node
         extra_delimiters: delimiters to apply when splitting log message into words (in addition to whitespace).
     """
     self.depth = depth - 2  # number of prefix tokens in each tree path (exclude root and leaf node)
     self.sim_th = sim_th
     self.max_children = max_children
     self.clusters = []
     self.root_node = Node("(ROOT)", 0)
     self.profiler = profiler
     self.extra_delimiters = extra_delimiters