def tearDown(self):
     """Delete all persisted data after the tests."""
     self.aminer_config = AminerConfig.load_config(self.__configFilePath)
     persistence_file_name = AminerConfig.build_persistence_file_name(
         self.aminer_config)
     if os.path.exists(persistence_file_name):
         shutil.rmtree(persistence_file_name)
     if not os.path.exists(persistence_file_name):
         os.makedirs(persistence_file_name)
     SecureOSFunctions.close_base_directory()
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 target_path_list=None,
                 allow_missing_values_flag=True,
                 ignore_list=None,
                 output_log_line=True,
                 auto_include_flag=False,
                 time_window_length=86400,
                 max_time_diff=360,
                 num_reduce_time_list=10):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param persistence_id name of persistency document.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined
        occurrences. When no paths are specified, the events given by the full path list are analyzed.
        @param allow_missing_values_flag when set to True, the detector will also use matches, where one of the pathes from target_path_list
        does not refer to an existing parsed data object.
        @param ignore_list list of paths that are not considered for correlation, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies.
        @param time_window_length length of the time window for which the appearances of log lines are identified with each other.
        Value of 86400 specfies a day and 604800 a week.
        @param max_time_diff maximal time difference in seconds for new times. If the difference of the new time to all previous times is
        greater than max_time_diff the new time is considered an anomaly.
        @param num_reduce_time_list number of new time entries appended to the time list, before the list is being reduced.
        """
        self.next_persist_time = None
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.allow_missing_values_flag = allow_missing_values_flag
        self.aminer_config = aminer_config
        self.output_log_line = output_log_line
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []
        self.target_path_list = target_path_list
        if self.target_path_list is None:
            self.target_path_list = []
        self.time_window_length = time_window_length
        self.max_time_diff = max_time_diff
        self.num_reduce_time_list = num_reduce_time_list

        # Keys: Tuple of values of the paths of target_path_list, Entries: List of all appeared times to the tuple.
        self.appeared_time_list = {}
        # Keys: Tuple of values of the paths of target_path_list, Entries: Counter of appended times to the time list since last reduction.
        self.counter_reduce_time_intervals = {}

        # Loads the persistence
        self.persistence_id = persistence_id
        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)

        # Imports the persistence
        self.load_persistence_data()
    def __init__(self,
                 aminer_config,
                 ruleset,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 output_log_line=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param ruleset a list of MatchRule rules with appropriate CorrelationRules attached as actions.
        """
        self.aminer_config = aminer_config
        self.event_classification_ruleset = ruleset
        self.anomaly_event_handlers = anomaly_event_handlers
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            AminerConfig.KEY_PERSISTENCE_PERIOD,
            AminerConfig.DEFAULT_PERSISTENCE_PERIOD)
        self.persistence_id = persistence_id
        self.output_log_line = output_log_line
        self.last_log_atom = None

        event_correlation_set = set()
        for rule in self.event_classification_ruleset:
            if rule.match_action.artefact_a_rules is not None:
                event_correlation_set |= set(
                    rule.match_action.artefact_a_rules)
            if rule.match_action.artefact_b_rules is not None:
                event_correlation_set |= set(
                    rule.match_action.artefact_b_rules)
        self.event_correlation_ruleset = list(event_correlation_set)

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
Beispiel #4
0
    def __init__(self,
                 aminer_config,
                 target_path_list,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 allow_missing_values_flag=False,
                 auto_include_flag=False,
                 output_log_line=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param target_path_list the list of values to extract from each match to create the value combination to be checked.
        @param allow_missing_values_flag when set to True, the detector will also use matches, where one of the pathes from target_path_list
        does not refer to an existing parsed data object.
        @param auto_include_flag when set to True, this detector will report a new value only the first time before including it
        in the known values set automatically.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.allow_missing_values_flag = allow_missing_values_flag
        self.auto_include_flag = auto_include_flag
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id

        self.log_success = 0
        self.log_total = 0
        self.log_learned_path_value_combos = 0
        self.log_new_learned_values = []

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        self.next_persist_time = None
        self.known_values_set = set()
        self.load_persistence_data()
        PersistenceUtil.add_persistable_component(self)
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 auto_include_flag=False,
                 output_log_line=True):
        """Initialize the detector. This will also trigger reading or creation of persistence storage location."""
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id

        self.log_success = 0
        self.log_total = 0
        self.log_learned_paths = 0
        self.log_new_learned_paths = []

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is None:
            self.known_path_set = set()
        else:
            self.known_path_set = set(persistence_data)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).debug(
                '%s loaded persistence data.', self.__class__.__name__)
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 id_path_list,
                 target_path_list=None,
                 persistence_id='Default',
                 auto_include_flag=False,
                 output_log_line=True,
                 ignore_list=None,
                 constraint_list=None):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param id_path_list to specify group identifiers for which numeric ranges should be learned.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that all values occurring in these paths
        are considered for value range generation.
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether value ranges should be extended when values outside of ranges are observed.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted.
        @param constrain_list list of paths that have to be present in the log atom to be analyzed.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id
        self.id_path_list = id_path_list
        if constraint_list is None:
            self.constraint_list = []
        else:
            self.constraint_list = set(constraint_list)
        if ignore_list is None:
            self.ignore_list = []
        else:
            self.ignore_list = set(ignore_list)
        self.log_total = 0
        self.log_success = 0

        self.ranges_min = {}
        self.ranges_max = {}

        # Persisted data consists of min and max values for each identifier, i.e.,
        # [["min", [<id1, id2, ...>], <min_value>], ["max", [<id1, id2, ...>], <max_value>]]
        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is not None:
            for l in persistence_data:
                if l[0] == 'min':
                    self.ranges_min[tuple(l[1])] = float(l[2])
                elif l[0] == 'max':
                    self.ranges_max[tuple(l[1])] = float(l[2])
Beispiel #7
0
 def save_current_config(self, analysis_context, destination_file):
     """
     Save the current live config into a file.
     @param analysis_context the analysis context of the aminer.
     @param destination_file the path to the file in which the config is saved.
     """
     msg = AminerConfig.save_config(analysis_context, destination_file)
     self.REMOTE_CONTROL_RESPONSE = msg
     logging.getLogger(DEBUG_LOG_NAME).info(msg)
    def test17_demo_yaml_config_equals_python_config(self):
        """This test checks if the yaml demo config is the same as the python version."""
        spec = importlib.util.spec_from_file_location('aminer_config', '/usr/lib/logdata-anomaly-miner/aminer/YamlConfig.py')
        aminer_config = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(aminer_config)
        aminer_config.load_yaml('demo/aminer/demo-config.yml')
        yml_context = AnalysisContext(aminer_config)
        yml_context.build_analysis_pipeline()

        aminer_config = AminerConfig.load_config('demo/aminer/demo-config.py')
        py_context = AnalysisContext(aminer_config)
        py_context.build_analysis_pipeline()

        import copy
        yml_config_properties = copy.deepcopy(yml_context.aminer_config.config_properties)
        del yml_config_properties['Parser']
        del yml_config_properties['Input']
        del yml_config_properties['Analysis']
        del yml_config_properties['EventHandlers']
        del yml_config_properties['LearnMode']
        del yml_config_properties['SuppressNewMatchPathDetector']

        # remove SimpleUnparsedAtomHandler, VerboseUnparsedAtomHandler and NewMatchPathDetector as they are added by the YamlConfig.
        py_registered_components = copy.copy(py_context.registered_components)
        del py_registered_components[0]
        del py_registered_components[1]
        del py_registered_components[2]
        del py_registered_components[10]
        yml_registered_components = copy.copy(yml_context.registered_components)
        del yml_registered_components[0]
        del yml_registered_components[1]
        tmp = {}
        keys = list(py_registered_components.keys())
        for i in range(1, len(py_registered_components)+1):
            tmp[i] = py_registered_components[keys[i-1]]
        py_registered_components = tmp
        py_registered_components_by_name = copy.copy(py_context.registered_components_by_name)
        del py_registered_components_by_name['SimpleUnparsedHandler']
        del py_registered_components_by_name['VerboseUnparsedHandler']
        del py_registered_components_by_name['NewMatchPath']
        del py_registered_components_by_name['SimpleMonotonicTimestampAdjust']
        yml_registered_components_by_name = copy.copy(yml_context.registered_components_by_name)
        del yml_registered_components_by_name['DefaultNewMatchPathDetector']
        del yml_registered_components_by_name['AtomFilter']

        self.assertEqual(yml_config_properties, py_context.aminer_config.config_properties)
        # there actually is no easy way to compare aminer components as they do not implement the __eq__ method.
        self.assertEqual(len(yml_registered_components), len(py_registered_components))
        for i in range(2, len(yml_registered_components)):  # skipcq: PTC-W0060
            self.assertEqual(type(yml_registered_components[i]), type(py_registered_components[i]))
        self.assertEqual(yml_registered_components_by_name.keys(), py_registered_components_by_name.keys())
        for name in yml_registered_components_by_name.keys():
            self.assertEqual(type(yml_registered_components_by_name[name]), type(py_registered_components_by_name[name]))
        self.assertEqual(len(yml_context.real_time_triggered_components), len(py_context.real_time_triggered_components))
        # the atom_handler_list is not equal as the python version uses a SimpleMonotonicTimestampAdjust.
        self.assertEqual(yml_context.atomizer_factory.default_timestamp_paths, py_context.atomizer_factory.default_timestamp_paths)
        self.assertEqual(type(yml_context.atomizer_factory.event_handler_list), type(py_context.atomizer_factory.event_handler_list))
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 timestamp_path,
                 analyze_path_list,
                 min_bin_elements,
                 min_bin_time,
                 debug_mode=False,
                 persistence_id='Default',
                 output_log_line=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param timestamp_path if not None, use this path value for timestamp based bins.
        @param analyze_path_list list of match paths to analyze in this detector.
        @param min_bin_elements evaluate the latest bin only after at least that number of elements was added to it.
        @param min_bin_time evaluate the latest bin only when the first element is received after min_bin_time has elapsed.
        @param debug_mode if true, generate an analysis report even when average of last bin was within expected range.
        """
        self.anomaly_event_handlers = anomaly_event_handlers
        self.timestamp_path = timestamp_path
        self.min_bin_elements = min_bin_elements
        self.min_bin_time = min_bin_time
        self.debug_mode = debug_mode
        self.next_persist_time = None
        self.persistence_id = persistence_id
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        self.stat_data = []
        for path in analyze_path_list:
            self.stat_data.append((
                path,
                [],
            ))
        if persistence_data is not None:
            for val in persistence_data:
                if isinstance(val, str):
                    val = val.strip('[').strip(']').split(',', 2)
                    path = val[0].strip('"')
                    values = val[1].strip(' ').strip('[').strip(']')
                else:
                    path = val[0]
                    values = val[1]
                index = 0
                for p, _ in self.stat_data:
                    if p == path:
                        break
                    index += 1
                for value in values:
                    self.stat_data[index][1].append(value)
    def __init__(self, aminer_config, anomaly_event_handlers, id_path_list=None, target_path_list=None, seq_len=3, persistence_id='Default',
                 auto_include_flag=False, output_log_line=True, ignore_list=None, constraint_list=None):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined
        occurrences. When no paths are specified, the events given by the full path list are analyzed.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param id_path_list one or more paths that specify the trace of the sequence detection, i.e., incorrect sequences that are
        generated by interleaved events can be avoided when event sequence identifiers are available.
        @param seq_len the length of the sequences to be learned (larger lengths increase precision, but may overfit the data).
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param constrain_list list of paths that have to be present in the log atom to be analyzed.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id
        self.id_path_list = id_path_list
        if id_path_list is None:
            self.id_path_list = []
        self.constraint_list = constraint_list
        if self.constraint_list is None:
            self.constraint_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []
        self.seq_len = seq_len
        self.sequences = set()
        self.current_sequences = {}
        self.log_total = 0
        self.log_success = 0
        self.log_learned = 0
        self.log_learned_sequences = []

        self.persistence_file_name = AminerConfig.build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)

        # Persisted data contains lists of sequences, i.e., [[<seq1_elem1>, <seq1_elem2>], [<seq2_elem1, ...], ...]
        # Thereby, sequence elements may be tuples, i.e., combinations of values, or paths that define events.
        persistence_data = PersistenceUtil.load_json(self.persistence_file_name)
        if persistence_data is not None:
            for sequence in persistence_data:
                sequence_elem_tuple = []
                for sequence_elem in sequence:
                    sequence_elem_tuple.append(tuple(sequence_elem))
                self.sequences.add(tuple(sequence_elem_tuple))
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).debug('%s loaded persistence data.', self.__class__.__name__)
    def __init__(self,
                 aminer_config,
                 target_path_list,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 auto_include_flag=False,
                 default_interval=3600,
                 realert_interval=86400,
                 output_log_line=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param target_path_list to extract a source identification value from each logatom.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.default_interval = default_interval
        self.realert_interval = realert_interval
        # This timestamps is compared with timestamp values from log atoms for activation of alerting logic. The first timestamp from logs
        # above this value will trigger alerting.
        self.next_check_timestamp = 0
        self.last_seen_timestamp = 0
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id

        self.log_success = 0
        self.log_total = 0
        self.log_learned_values = 0
        self.log_new_learned_values = []

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        self.expected_values_dict = {}
        if persistence_data is not None:
            for key in persistence_data:
                value = persistence_data[key]
                if self.target_path_list is not None:  # skipcq: PTC-W0048
                    if value[3] != self.target_path_list:
                        continue
                elif self.target_path_list is not None and value[
                        3] not in self.target_path_list:
                    continue
                if value[1] != default_interval:
                    value[1] = default_interval
                    value[2] = value[0] + default_interval
                self.expected_values_dict[key] = value
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).debug(
                '%s loaded persistence data.', self.__class__.__name__)
        self.analysis_string = 'Analysis.%s'
 def setUp(self):
     """Set up all needed variables and remove persisted data."""
     PersistenceUtil.persistable_components = []
     self.aminer_config = AminerConfig.load_config(self.__configFilePath)
     self.analysis_context = AnalysisContext(self.aminer_config)
     self.output_stream = StringIO()
     self.stream_printer_event_handler = StreamPrinterEventHandler(
         self.analysis_context, self.output_stream)
     persistence_dir_name = AminerConfig.build_persistence_file_name(
         self.aminer_config)
     if os.path.exists(persistence_dir_name):
         shutil.rmtree(persistence_dir_name)
     if not os.path.exists(persistence_dir_name):
         os.makedirs(persistence_dir_name)
     initialize_loggers(self.aminer_config,
                        getpwnam('aminer').pw_uid,
                        getgrnam('aminer').gr_gid)
     if isinstance(persistence_dir_name, str):
         persistence_dir_name = persistence_dir_name.encode()
     SecureOSFunctions.secure_open_base_directory(
         persistence_dir_name, os.O_RDONLY | os.O_DIRECTORY | os.O_PATH)
     PersistenceUtil.SKIP_PERSISTENCE_ID_WARNING = True
Beispiel #13
0
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 parallel_check_count,
                 persistence_id='Default',
                 record_count_before_event=10000,
                 output_log_line=True,
                 use_path_match=True,
                 use_value_match=True,
                 min_rule_attributes=1,
                 max_rule_attributes=5):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param parallel_check_count number of rule detection checks to run in parallel.
        @param record_count_before_event number of events used to calculate statistics (i.e., window size)
        @param min_rule_attributes minimum number of attributes forming a rule
        @param max_rule_attributes maximum number of attributes forming a rule
        @param use_path_match if true rules are build based on path existance
        @param use_value_match if true rules are built based on actual values
        """
        self.last_timestamp = 0.0
        self.parallel_check_count = parallel_check_count
        self.anomaly_event_handlers = anomaly_event_handlers
        self.min_rule_attributes = min_rule_attributes
        self.max_rule_attributes = max_rule_attributes
        self.last_unhandled_match = None
        self.next_persist_time = None
        self.total_records = 0
        self.record_count_before_event = record_count_before_event
        self.persistence_id = persistence_id
        self.output_log_line = output_log_line
        self.use_path_match = use_path_match
        self.use_value_match = use_value_match
        self.aminer_config = aminer_config

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is None:
            self.feature_list = []
            self.event_count_table = [
                0
            ] * parallel_check_count * parallel_check_count * 2
            self.event_delta_table = [
                0
            ] * parallel_check_count * parallel_check_count * 2
        else:
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).debug(
                '%s loaded persistence data.', self.__class__.__name__)
Beispiel #14
0
    def __init__(self,
                 aminer_config,
                 target_path_list,
                 anomaly_event_handlers,
                 id_path_list,
                 min_allowed_time_diff,
                 persistence_id='Default',
                 allow_missing_values_flag=False,
                 auto_include_flag=False,
                 output_log_line=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param target_path_list the list of values to extract from each match to create the value combination to be checked.
        @param id_path_list the list of pathes where id values can be stored in all relevant log event types.
        @param min_allowed_time_diff the minimum amount of time in seconds after the first appearance of a log atom with a specific id
        that is waited for other log atoms with the same id to occur. The maximum possible time to keep an incomplete combo
        is 2*min_allowed_time_diff
        @param allow_missing_values_flag when set to True, the detector will also use matches, where one of the pathes from target_path_list
        does not refer to an existing parsed data object.
        @param auto_include_flag when set to True, this detector will report a new value only the first time before including it
        in the known values set automatically.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.id_path_list = id_path_list
        self.min_allowed_time_diff = min_allowed_time_diff
        self.allow_missing_values_flag = allow_missing_values_flag
        self.auto_include_flag = auto_include_flag
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id

        self.log_success = 0
        self.log_total = 0
        self.log_learned_path_value_combos = 0
        self.log_new_learned_values = []

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        self.next_persist_time = None
        self.load_persistence_data()
        PersistenceUtil.add_persistable_component(self)

        self.id_dict_current = {}
        self.id_dict_old = {}
        self.next_shift_time = None
    def __init__(self,
                 aminer_config,
                 histogram_defs,
                 report_interval,
                 report_event_handlers,
                 reset_after_report_flag=True,
                 persistence_id='Default',
                 output_log_line=True):
        """
        Initialize the analysis component.
        @param histogram_defs is a list of tuples containing the target property path to analyze and the BinDefinition to apply for
        binning.
        @param report_interval delay in seconds between creation of two reports. The parameter is applied to the parsed record data
        time, not the system time. Hence reports can be delayed when no data is received.
        """
        self.last_report_time = None
        self.next_report_time = 0.0
        self.histogram_data = []
        for (path, bin_definition) in histogram_defs:
            self.histogram_data.append(HistogramData(path, bin_definition))
        self.report_interval = report_interval
        self.report_event_handlers = report_event_handlers
        self.reset_after_report_flag = reset_after_report_flag
        self.persistence_id = persistence_id
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is not None:
            msg = 'No data reading, def merge yet'
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
            raise Exception(msg)
Beispiel #16
0
    def __init__(self, program_name, aminer_config):
        self.program_name = program_name
        self.analysis_context = AnalysisContext(aminer_config)
        self.run_analysis_loop_flag = True
        self.log_streams_by_name = {}
        self.persistence_file_name = AminerConfig.build_persistence_file_name(
          self.analysis_context.aminer_config, self.__class__.__name__ + '/RepositioningData')
        self.next_persist_time = time.time() + 600

        self.repositioning_data_dict = {}
        self.master_control_socket = None
        self.remote_control_socket = None

        # This dictionary provides a lookup list from file descriptor to associated object for handling the data to and from the given
        # descriptor. Currently supported handler objects are:
        # * Parent process socket
        # * Remote control listening socket
        # * LogStreams
        # * Remote control connections
        self.tracked_fds_dict = {}

        # Override the signal handler to allow graceful shutdown.
        def graceful_shutdown_handler(_signo, _stack_frame):
            """React on typical shutdown signals."""
            msg = '%s: caught signal, shutting down' % program_name
            print(msg, file=sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).info(msg)
            self.run_analysis_loop_flag = False

        import signal
        signal.signal(signal.SIGHUP, graceful_shutdown_handler)
        signal.signal(signal.SIGINT, graceful_shutdown_handler)
        signal.signal(signal.SIGTERM, graceful_shutdown_handler)

        # Do this on at the end of the initialization to avoid having partially initialized objects inside the registry.
        self.analysis_context.add_time_triggered_component(self)
Beispiel #17
0
def main():
    """Run the aminer main program."""
    # Extract program name, but only when sure to contain no problematic characters.
    warnings.filterwarnings('ignore', category=ImportWarning)
    program_name = sys.argv[0].split('/')[-1]
    if (program_name == '.') or (program_name == '..') or (re.match(
            '^[a-zA-Z0-9._-]+$', program_name) is None):
        print('Invalid program name, check your execution args',
              file=sys.stderr)
        sys.exit(1)

    # We will not read stdin from here on, so get rid of it immediately, thus aberrant child cannot manipulate caller's stdin using it.
    stdin_fd = os.open('/dev/null', os.O_RDONLY)
    os.dup2(stdin_fd, 0)
    os.close(stdin_fd)

    help_message = 'aminer - logdata-anomaly-miner\n'
    if supports_color():
        help_message += colflame
    else:
        help_message += flame
    parser = argparse.ArgumentParser(
        description=help_message,
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version=__version_string__)
    parser.add_argument('-u',
                        '--check-updates',
                        action='store_true',
                        help='check if updates for the aminer are available.')
    parser.add_argument('-c',
                        '--config',
                        default='/etc/aminer/config.yml',
                        type=str,
                        help='path to the config-file')
    parser.add_argument('-D',
                        '--daemon',
                        action='store_false',
                        help='run as a daemon process')
    parser.add_argument(
        '-s',
        '--stat',
        choices=["0", "1", "2"],
        type=str,
        help=
        'set the stat level. Possible stat-levels are 0 for no statistics, 1 for normal statistic level and 2 for '
        'verbose statistics.')
    parser.add_argument(
        '-d',
        '--debug',
        choices=["0", "1", "2"],
        type=str,
        help=
        'set the debug level. Possible debug-levels are 0 for no debugging, 1 for normal output (INFO and above), 2 '
        'for printing all debug information.')
    parser.add_argument('--run-analysis',
                        action='store_true',
                        help='enable/disable analysis')
    parser.add_argument('-C',
                        '--clear',
                        action='store_true',
                        help='removes all persistence directories')
    parser.add_argument('-r',
                        '--remove',
                        action='append',
                        type=str,
                        help='removes a specific persistence directory')
    parser.add_argument('-R',
                        '--restore',
                        type=str,
                        help='restore a persistence backup')
    parser.add_argument(
        '-f',
        '--from-begin',
        action='store_true',
        help='removes RepositioningData before starting the aminer')
    parser.add_argument(
        '-o',
        '--offline-mode',
        action='store_true',
        help='stop the aminer after all logs have been processed.')
    parser.add_argument(
        "--config-properties",
        metavar="KEY=VALUE",
        nargs='+',
        help=
        "Set a number of config_properties by using key-value pairs (do not put spaces before or after the = sign). "
        "If a value contains spaces, you should define it with double quotes: 'foo=\"this is a sentence\". Note that "
        "values are always treated as strings. If values are already defined in the config_properties, the input "
        "types are converted to the ones already existing.")

    args = parser.parse_args()

    if args.check_updates:
        import urllib3
        url = 'https://raw.githubusercontent.com/ait-aecid/logdata-anomaly-miner/main/source/root/usr/lib/logdata-anomaly-miner/metadata.py'
        http = urllib3.PoolManager()
        r = http.request('GET', url, preload_content=True)
        metadata = r.data.decode()
        http.clear()
        lines = metadata.split('\n')
        curr_version = None
        for line in lines:
            if '__version__ = ' in line:
                curr_version = line.split('__version__ = ')[1].strip('"')
                break
        if __version__ == curr_version:
            print("The current aminer version %s is installed." % curr_version)
        else:
            print(
                "A new aminer version exists (%s). Currently version %s is installed."
                % (curr_version, __version__))
            print("Use git pull to update the aminer version.")
        sys.exit(0)

    config_file_name = args.config
    run_in_foreground_flag = args.daemon
    run_analysis_child_flag = args.run_analysis
    clear_persistence_flag = args.clear
    remove_persistence_dirs = args.remove
    from_begin_flag = args.from_begin
    global offline_mode  # skipcq: PYL-W0603
    offline_mode = args.offline_mode
    if args.restore is not None and ('.' in args.restore
                                     or '/' in args.restore):
        parser.error('The restore path %s must not contain any . or /' %
                     args.restore)
    if args.remove is not None:
        for remove in args.remove:
            if '.' in remove or '/' in remove:
                parser.error('The remove path %s must not contain any . or /' %
                             remove)
    restore_relative_persistence_path = args.restore
    stat_level = 1
    debug_level = 1
    stat_level_console_flag = False
    debug_level_console_flag = False
    if args.stat is not None:
        stat_level = int(args.stat)
        stat_level_console_flag = True
    if args.debug is not None:
        debug_level = int(args.debug)
        debug_level_console_flag = True

    # Load the main configuration file.
    if not os.path.exists(config_file_name):
        print('%s: config "%s" not (yet) available!' %
              (program_name, config_file_name),
              file=sys.stderr)
        sys.exit(1)

    # using the solution here to override config_properties:
    # https://stackoverflow.com/questions/27146262/create-variable-key-value-pairs-with-argparse-python
    use_temp_config = False
    config_properties = parse_vars(args.config_properties)
    if args.config_properties and "LearnMode" in config_properties:
        ymlext = [".YAML", ".YML", ".yaml", ".yml"]
        extension = os.path.splitext(config_file_name)[1]
        if extension in ymlext:
            use_temp_config = True
            fd, temp_config = tempfile.mkstemp(suffix=".yml")
            with open(config_file_name) as f:
                for line in f:
                    if "LearnMode" in line:
                        line = "LearnMode: %s" % config_properties["LearnMode"]
                    os.write(fd, line.encode())
            config_file_name = temp_config
            os.close(fd)
        else:
            msg = "The LearnMode parameter does not exist in .py configs!"
            print(msg, sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
            sys.exit(1)

    # Minimal import to avoid loading too much within the privileged process.
    try:
        aminer_config = AminerConfig.load_config(config_file_name)
        if use_temp_config:
            os.remove(config_file_name)
            config_file_name = args.config
    except ValueError:
        sys.exit(1)

    for config_property in config_properties:
        if config_property == "LearnMode":
            continue
        old_value = aminer_config.config_properties.get(config_property)
        value = config_properties[config_property]
        if old_value is not None:
            try:
                if isinstance(old_value, bool):
                    if value == "True":
                        value = True
                    elif value == "False":
                        value = False
                    else:
                        msg = "The %s parameter must be of type %s!" % (
                            config_property, type(old_value))
                        print(msg, sys.stderr)
                        logging.getLogger(
                            AminerConfig.DEBUG_LOG_NAME).error(msg)
                        sys.exit(1)
                elif isinstance(old_value, int):
                    value = int(value)
                elif isinstance(old_value, float):
                    value = float(value)
                elif isinstance(old_value, list):
                    value = ast.literal_eval(value)
            except ValueError:
                msg = "The %s parameter must be of type %s!" % (
                    config_property, type(old_value))
                print(msg, sys.stderr)
                logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
                sys.exit(1)
        else:
            msg = "The %s parameter is not set in the config. It will be treated as a string!" % config_property
            print("WARNING: " + msg, sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).warning(msg)
        aminer_config.config_properties[config_property] = value

    persistence_dir = aminer_config.config_properties.get(
        AminerConfig.KEY_PERSISTENCE_DIR, AminerConfig.DEFAULT_PERSISTENCE_DIR)
    child_user_name = aminer_config.config_properties.get(
        AminerConfig.KEY_AMINER_USER)
    child_group_name = aminer_config.config_properties.get(
        AminerConfig.KEY_AMINER_GROUP)
    child_user_id = -1
    child_group_id = -1
    try:
        if child_user_name is not None:
            child_user_id = getpwnam(child_user_name).pw_uid
        if child_group_name is not None:
            child_group_id = getgrnam(child_group_name).gr_gid
    except:  # skipcq: FLK-E722
        print('Failed to resolve %s or %s' %
              (AminerConfig.KEY_AMINER_USER, AminerConfig.KEY_AMINER_GROUP),
              file=sys.stderr)
        sys.exit(1)

    if not stat_level_console_flag and AminerConfig.KEY_LOG_STAT_LEVEL in aminer_config.config_properties:
        stat_level = aminer_config.config_properties[
            AminerConfig.KEY_LOG_STAT_LEVEL]
    if not debug_level_console_flag and AminerConfig.KEY_LOG_DEBUG_LEVEL in aminer_config.config_properties:
        debug_level = aminer_config.config_properties[
            AminerConfig.KEY_LOG_DEBUG_LEVEL]
    if AminerConfig.CONFIG_KEY_ENCODING in aminer_config.config_properties:
        AminerConfig.ENCODING = aminer_config.config_properties[
            AminerConfig.CONFIG_KEY_ENCODING]

    AminerConfig.STAT_LEVEL = stat_level
    AminerConfig.DEBUG_LEVEL = debug_level

    initialize_loggers(aminer_config, child_user_id, child_group_id)

    if restore_relative_persistence_path is not None and (
            clear_persistence_flag or remove_persistence_dirs):
        msg = 'The --restore parameter removes all persistence files. Do not use this parameter with --Clear or --Remove!'
        print(msg, sys.stderr)
        logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
        sys.exit(1)

    if clear_persistence_flag:
        if remove_persistence_dirs:
            msg = 'The --clear and --remove arguments must not be used together!'
            print(msg, file=sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
            sys.exit(1)
        clear_persistence(persistence_dir)

    if remove_persistence_dirs:
        persistence_dir_name = aminer_config.config_properties.get(
            AminerConfig.KEY_PERSISTENCE_DIR,
            AminerConfig.DEFAULT_PERSISTENCE_DIR)
        for filename in os.listdir(persistence_dir_name):
            file_path = os.path.join(persistence_dir_name, filename)
            try:
                if not os.path.isdir(file_path):
                    msg = 'The aminer persistence directory should not contain any files.'
                    print(msg, file=sys.stderr)
                    logging.getLogger(AminerConfig.DEBUG_LOG_NAME).warning(msg)
                    continue
                shutil.rmtree(file_path)
            except OSError as e:
                msg = 'Failed to delete %s. Reason: %s' % (file_path, e)
                print(msg, file=sys.stderr)
                logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)

        for filename in remove_persistence_dirs:
            file_path = os.path.join(persistence_dir, filename)
            try:
                if not os.path.exists(file_path):
                    continue
                if not os.path.isdir(file_path):
                    msg = 'The aminer persistence directory should not contain any files.'
                    print(msg, file=sys.stderr)
                    logging.getLogger(AminerConfig.DEBUG_LOG_NAME).warning(msg)
                    continue
                shutil.rmtree(file_path)
            except OSError as e:
                msg = 'Failed to delete %s. Reason: %s' % (file_path, e)
                print(msg, file=sys.stderr)
                logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)

    if restore_relative_persistence_path is not None:
        absolute_persistence_path = os.path.join(
            persistence_dir, 'backup', restore_relative_persistence_path)
        if not os.path.exists(absolute_persistence_path):
            msg = '%s does not exist. Continuing without restoring persistence.' % absolute_persistence_path
            print(msg, file=sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).warning(msg)
        else:
            clear_persistence(persistence_dir)
            copytree(absolute_persistence_path, persistence_dir)
            persistence_dir_fd = SecureOSFunctions.secure_open_base_directory(
                persistence_dir, os.O_RDONLY | os.O_DIRECTORY | os.O_PATH)
            for dirpath, _dirnames, filenames in os.walk(persistence_dir):
                os.chown(dirpath,
                         child_user_id,
                         child_group_id,
                         dir_fd=persistence_dir_fd,
                         follow_symlinks=False)
                for filename in filenames:
                    os.chown(os.path.join(dirpath, filename),
                             child_user_id,
                             child_user_id,
                             dir_fd=persistence_dir_fd,
                             follow_symlinks=False)

    if from_begin_flag:
        repositioning_data_path = os.path.join(
            aminer_config.config_properties.get(
                AminerConfig.KEY_PERSISTENCE_DIR,
                AminerConfig.DEFAULT_PERSISTENCE_DIR), 'AnalysisChild',
            'RepositioningData')
        if os.path.exists(repositioning_data_path):
            os.remove(repositioning_data_path)

    if run_analysis_child_flag:
        # Call analysis process, this function will never return.
        run_analysis_child(aminer_config, program_name)

    # Start importing of aminer specific components after reading of "config.py" to allow replacement of components via sys.path
    # from within configuration.
    log_sources_list = aminer_config.config_properties.get(
        AminerConfig.KEY_LOG_SOURCES_LIST)
    if (log_sources_list is None) or not log_sources_list:
        msg = '%s: %s not defined' % (program_name,
                                      AminerConfig.KEY_LOG_SOURCES_LIST)
        print(msg, file=sys.stderr)
        logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
        sys.exit(1)

    # Now create the management entries for each logfile.
    log_data_resource_dict = {}
    for log_resource_name in log_sources_list:
        # From here on log_resource_name is a byte array.
        log_resource_name = decode_string_as_byte_string(log_resource_name)
        log_resource = None
        if log_resource_name.startswith(b'file://'):
            log_resource = FileLogDataResource(log_resource_name, -1)
        elif log_resource_name.startswith(b'unix://'):
            log_resource = UnixSocketLogDataResource(log_resource_name, -1)
        else:
            msg = 'Unsupported schema in %s: %s' % (
                AminerConfig.KEY_LOG_SOURCES_LIST, repr(log_resource_name))
            print(msg, file=sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
            sys.exit(1)
        if not os.path.exists(log_resource_name[7:].decode()):
            msg = "WARNING: file or socket '%s' does not exist (yet)!" % log_resource_name[
                7:].decode()
            print(msg, file=sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).warning(msg)
        try:
            log_resource.open()
        except OSError as open_os_error:
            if open_os_error.errno == errno.EACCES:
                msg = '%s: no permission to access %s' % (
                    program_name, repr(log_resource_name))
                print(msg, file=sys.stderr)
                logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
                sys.exit(1)
            else:
                msg = '%s: unexpected error opening %s: %d (%s)' % (
                    program_name, repr(log_resource_name), open_os_error.errno,
                    os.strerror(open_os_error.errno))
                print(msg, file=sys.stderr)
                logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
                sys.exit(1)
        log_data_resource_dict[log_resource_name] = log_resource

    # Create the remote control socket, if any. Do this in privileged mode to allow binding it at arbitrary locations and support restricted
    # permissions of any type for current (privileged) uid.
    remote_control_socket_name = aminer_config.config_properties.get(
        AminerConfig.KEY_REMOTE_CONTROL_SOCKET_PATH, None)
    remote_control_socket = None
    if remote_control_socket_name is not None:
        if os.path.exists(remote_control_socket_name):
            try:
                os.unlink(remote_control_socket_name)
            except OSError:
                msg = 'Failed to clean up old remote control socket at %s' % remote_control_socket_name
                print(msg, file=sys.stderr)
                logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
                sys.exit(1)
        # Create the local socket: there is no easy way to create it with correct permissions, hence a fork is needed, setting umask,
        # bind the socket. It is also recommended to create the socket in a directory having the correct permissions already.
        remote_control_socket = socket.socket(socket.AF_UNIX,
                                              socket.SOCK_STREAM)
        remote_control_socket.setblocking(False)
        bind_child_pid = os.fork()
        if bind_child_pid == 0:
            os.umask(0o177)
            remote_control_socket.bind(remote_control_socket_name)
            # Do not perform any cleanup, flushing of streams. Use _exit(0) to avoid interference with fork.
            os._exit(0)  # skipcq: PYL-W0212
        os.waitpid(bind_child_pid, 0)
        remote_control_socket.listen(4)

    # Now have checked all we can get from the configuration in the privileged process. Detach from the TTY when in daemon mode.
    if not run_in_foreground_flag:
        child_pid = 0
        try:
            # Fork a child to make sure, we are not the process group leader already.
            child_pid = os.fork()
        except Exception as fork_exception:  # skipcq: PYL-W0703
            msg = 'Failed to daemonize: %s' % fork_exception
            print(msg, file=sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
            sys.exit(1)
        if child_pid != 0:
            # This is the parent.
            os._exit(0)  # skipcq: PYL-W0212
        # This is the child. Create a new session and become process group leader. Here we get rid of the controlling tty.
        os.setsid()
        # Fork again to become an orphaned process not being session leader, hence not able to get a controlling tty again.
        try:
            child_pid = os.fork()
        except Exception as fork_exception:  # skipcq: PYL-W0703
            msg = 'Failed to daemonize: %s' % fork_exception
            print(msg, file=sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
            sys.exit(1)
        if child_pid != 0:
            # This is the parent.
            os._exit(0)  # skipcq: PYL-W0212
        # Move to root directory to avoid lingering in some cwd someone else might want to unmount.
        os.chdir('/')
        # Change the umask here to clean all group/other mask bits so that accidentially created files are not accessible by other.
        os.umask(0o77)

    # Install a signal handler catching common stop signals and relaying it to all children for sure.
    # skipcq: PYL-W0603
    global child_termination_triggered_flag
    child_termination_triggered_flag = False

    def graceful_shutdown_handler(_signo, _stackFrame):
        """React on typical shutdown signals."""
        msg = '%s: caught signal, shutting down' % program_name
        print(msg, file=sys.stderr)
        logging.getLogger(AminerConfig.DEBUG_LOG_NAME).info(msg)
        # Just set the flag. It is likely, that child received same signal also so avoid multiple signaling, which could interrupt the
        # shutdown procedure again.
        # skipcq: PYL-W0603
        global child_termination_triggered_flag
        child_termination_triggered_flag = True

    import signal
    signal.signal(signal.SIGHUP, graceful_shutdown_handler)
    signal.signal(signal.SIGINT, graceful_shutdown_handler)
    signal.signal(signal.SIGTERM, graceful_shutdown_handler)

    # Now create the socket to connect the analysis child.
    (parent_socket, child_socket) = socket.socketpair(socket.AF_UNIX,
                                                      socket.SOCK_DGRAM, 0)
    # Have it nonblocking from here on.
    parent_socket.setblocking(False)
    child_socket.setblocking(False)

    # Use normal fork, we should have been detached from TTY already. Flush stderr to avoid duplication of output if both child and
    # parent want to write something.
    sys.stderr.flush()
    child_pid = os.fork()
    if child_pid == 0:
        # Relocate the child socket fd to 3 if needed
        if child_socket.fileno() != 3:
            os.dup2(child_socket.fileno(), 3)
            child_socket.close()

        # Clear the supplementary groups before dropping privileges. This makes only sense when changing the uid or gid.
        if os.getuid() == 0:
            if ((child_user_id != -1) and (child_user_id != os.getuid())) or (
                (child_group_id != -1) and (child_group_id != os.getgid())):
                os.setgroups([])

            # Drop privileges before executing child. setuid/gid will raise an exception when call has failed.
            if child_group_id != -1:
                os.setgid(child_group_id)
            if child_user_id != -1:
                os.setuid(child_user_id)
        else:
            msg = 'INFO: No privilege separation when started as unprivileged user'
            print(msg, file=sys.stderr)
            tmp_username = aminer_config.config_properties.get(
                AminerConfig.KEY_AMINER_USER)
            tmp_group = aminer_config.config_properties.get(
                AminerConfig.KEY_AMINER_GROUP)
            aminer_user_id = -1
            aminer_group_id = -1
            try:
                if tmp_username is not None:
                    aminer_user_id = getpwnam(tmp_username).pw_uid
                if tmp_group is not None:
                    aminer_group_id = getgrnam(tmp_group).gr_gid
            except:  # skipcq: FLK-E722
                print('Failed to resolve %s or %s' %
                      (AminerConfig.KEY_AMINER_USER,
                       AminerConfig.KEY_AMINER_GROUP),
                      file=sys.stderr)
                sys.exit(1)

            initialize_loggers(aminer_config, aminer_user_id, aminer_group_id)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).info(msg)

        # Now resolve the specific analysis configuration file (if any).
        analysis_config_file_name = aminer_config.config_properties.get(
            AminerConfig.KEY_ANALYSIS_CONFIG_FILE, None)
        if analysis_config_file_name is None:
            analysis_config_file_name = config_file_name
        elif not os.path.isabs(analysis_config_file_name):
            analysis_config_file_name = os.path.join(
                os.path.dirname(config_file_name), analysis_config_file_name)

        # This is the child. Close all parent file descriptors, we do not need. Perhaps this could be done more elegantly.
        for close_fd in range(4, 1 << 16):
            try:
                os.close(close_fd)
            except OSError as open_os_error:
                if open_os_error.errno == errno.EBADF:
                    continue
                msg = '%s: unexpected exception closing file descriptors: %s' % (
                    program_name, open_os_error)
                print(msg, file=sys.stderr)
                logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
                # Flush stderr before exit without any cleanup.
                sys.stderr.flush()
                os._exit(1)  # skipcq: PYL-W0212

        # Now execute the very same program again, but user might have moved or renamed it meanwhile. This would be problematic with
        # SUID-binaries (which we do not yet support). Do NOT just fork but also exec to avoid child circumventing
        # parent's ALSR due to cloned kernel VMA.
        exec_args = [
            'aminerChild', '--run-analysis', '--config',
            analysis_config_file_name, '--stat',
            str(stat_level), '--debug',
            str(debug_level)
        ]
        if offline_mode:
            exec_args.append("--offline-mode")
        if args.config_properties:
            exec_args.append("--config-properties")
            for config_property in args.config_properties:
                exec_args.append(config_property)
        os.execv(sys.argv[0], exec_args)  # skipcq: BAN-B606
        msg = 'Failed to execute child process'
        print(msg, file=sys.stderr)
        logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
        sys.stderr.flush()
        os._exit(1)  # skipcq: PYL-W0212
    child_socket.close()

    # Send all log resource information currently available to child process.
    for log_resource_name, log_resource in log_data_resource_dict.items():
        if (log_resource
                is not None) and (log_resource.get_file_descriptor() >= 0):
            SecureOSFunctions.send_logstream_descriptor(
                parent_socket, log_resource.get_file_descriptor(),
                log_resource_name)
            log_resource.close()

    # Send the remote control server socket, if any and close it afterwards. It is not needed any more on parent side.
    if remote_control_socket is not None:
        SecureOSFunctions.send_annotated_file_descriptor(
            parent_socket, remote_control_socket.fileno(), 'remotecontrol', '')
        remote_control_socket.close()

    exit_status = 0
    child_termination_triggered_count = 0
    while True:
        if child_termination_triggered_flag:
            if child_termination_triggered_count == 0:
                time.sleep(1)
            elif child_termination_triggered_count < 5:
                os.kill(child_pid, signal.SIGTERM)
            else:
                os.kill(0, signal.SIGKILL)
            child_termination_triggered_count += 1
        (sig_child_pid, sig_status) = os.waitpid(-1, os.WNOHANG)
        if sig_child_pid != 0:
            if sig_child_pid == child_pid:
                if child_termination_triggered_flag or offline_mode:
                    # This was expected, just terminate.
                    break
                msg = '%s: Analysis child process %d terminated unexpectedly with signal 0x%x' % (
                    program_name, sig_child_pid, sig_status)
                print(msg, file=sys.stderr)
                logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
                exit_status = 1
                break
            # So the child has been cloned, the clone has terminated. This should not happen either.
            msg = '%s: untracked child %d terminated with with signal 0x%x' % (
                program_name, sig_child_pid, sig_status)
            print(msg, file=sys.stderr)
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
            exit_status = 1

        # Child information handled, scan for rotated logfiles or other resources, where reopening might make sense.
        for log_resouce_name, log_data_resource in log_data_resource_dict.items(
        ):
            try:
                if not log_data_resource.open(reopen_flag=True):
                    continue
            except OSError as open_os_error:
                if open_os_error.errno == errno.EACCES:
                    msg = '%s: no permission to access %s' % (program_name,
                                                              log_resouce_name)
                    print(msg, file=sys.stderr)
                    logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
                else:
                    msg = '%s: unexpected error reopening %s: %d (%s)' % (
                        program_name, log_resouce_name, open_os_error.errno,
                        os.strerror(open_os_error.errno))
                    print(msg, file=sys.stderr)
                    logging.getLogger(AminerConfig.DEBUG_LOG_NAME).error(msg)
                exit_status = 2
                continue
            SecureOSFunctions.send_logstream_descriptor(
                parent_socket, log_data_resource.get_file_descriptor(),
                log_resouce_name)
            log_data_resource.close()
        time.sleep(1)
    parent_socket.close()
    SecureOSFunctions.close_base_directory()
    SecureOSFunctions.close_log_directory()
    sys.exit(exit_status)
    def __init__(self,
                 aminer_config,
                 target_path_list,
                 anomaly_event_handlers,
                 window_size,
                 min_anomaly_score,
                 min_variance,
                 num_windows,
                 persistence_id='Default',
                 auto_include_flag=False,
                 output_log_line=True,
                 ignore_list=None,
                 constraint_list=None):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed as separate
        dimensions. When no paths are specified, the events given by the full path list are analyzed (one dimension).
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param window_size the length of the time window for counting in seconds.
        @param min_anomaly_score the minimum computed outlier score for reporting anomalies. Scores are scaled by training data, i.e.,
        reasonable minimum scores are >1 to detect outliers with respect to currently trained PCA matrix.
        @param min_variance the minimum variance covered by the principal components in range [0, 1].
        @param num_windows the number of time windows in the sliding window approach. Total covered time span = window_size * num_windows.
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether new count measurements are added to the PCA count matrix.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param constrain_list list of paths that have to be present in the log atom to be analyzed.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id
        self.block_time = window_size
        self.anomaly_score_threshold = min_anomaly_score
        self.variance_threshold = min_variance
        if num_windows < 3:
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).warning(
                'num_windows must be >= 3!')
            self.num_windows = 3
        else:
            self.num_windows = num_windows
        self.first_log = True
        self.start_time = 0
        self.constraint_list = constraint_list
        self.event_count_matrix = []
        self.feature_list = []
        self.ecm = None
        if self.constraint_list is None:
            self.constraint_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []
        self.log_total = 0
        self.log_success = 0
        self.log_windows = 0

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)

        if persistence_data is not None:
            self.event_count_matrix = list(persistence_data)
            self.compute_pca()
            # Copy feature list into event count vector and reset counts of each feature
            self.event_count_vector = copy.deepcopy(self.event_count_matrix[0])
            self.reset_event_count_vector()
        else:
            if self.target_path_list is None or len(
                    self.target_path_list) == 0:
                # Only one dimension when events are used instead of values; use empty string as placeholder
                self.event_count_vector = {'': {}}
            else:
                self.event_count_vector = {}
    def __init__(self, aminer_config, anomaly_event_handlers, event_type_detector, persistence_id='Default', target_path_list=None,
                 output_log_line=True, auto_include_flag=False, num_init=50, force_period_length=False, set_period_length=10, alpha=0.05,
                 alpha_bt=0.05, num_results_bt=15, num_min_time_history=20, num_max_time_history=30, num_periods_tsa_ini=20):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param event_type_detector used to track the number of events in the time windows.
        @param persistence_id name of persistency document.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined
        occurrences. When no paths are specified, the events given by the full path list are analyzed.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies.
        @param num_init number of lines processed before the period length is calculated.
        @param force_period_length states if the period length is calculated through the ACF, or if the period length is forced to
        be set to set_period_length.
        @param set_period_length states how long the period length is if force_period_length is set to True.
        @param alpha significance level of the estimated values.
        @param alpha_bt significance level for the bt test.
        @param num_results_bt number of results which are used in the binomial test.
        @param num_min_time_history number of lines processed before the period length is calculated.
        @param num_max_time_history maximum number of values of the time_history.
        @param num_periods_tsa_ini number of periods used to initialize the Arima-model.
        """
        self.aminer_config = aminer_config
        self.anomaly_event_handlers = anomaly_event_handlers
        self.event_type_detector = event_type_detector
        # Add the PathArimaDetector to the list of the modules, which use the event_type_detector.
        self.event_type_detector.add_following_modules(self)
        self.persistence_id = persistence_id
        self.target_path_list = target_path_list
        if self.target_path_list is None:
            self.target_path_list = []
        self.output_log_line = output_log_line
        self.auto_include_flag = auto_include_flag
        self.num_init = num_init
        self.force_period_length = force_period_length
        self.set_period_length = set_period_length
        self.alpha = alpha
        self.alpha_bt = alpha_bt
        self.num_results_bt = num_results_bt
        self.num_min_time_history = num_min_time_history
        self.num_max_time_history = num_max_time_history
        self.num_periods_tsa_ini = num_periods_tsa_ini

        # Test if the ETD saves the values
        if not self.event_type_detector.save_values:
            msg = 'Changed the parameter save_values of the VTD from False to True to properly use the PathArimaDetector'
            logging.getLogger(DEBUG_LOG_NAME).warning(msg)
            print('WARNING: ' + msg, file=sys.stderr)
            self.event_type_detector.save_values = True

        # Test if the ETD saves enough values
        if self.event_type_detector.min_num_vals < self.num_periods_tsa_ini * int(self.num_init/2):
            msg = 'Changed the parameter min_num_vals of the ETD from %s to %s to properly use the PathArimaDetector' % (
                    self.event_type_detector.min_num_vals, self.num_periods_tsa_ini * int(self.num_init/2))
            logging.getLogger(DEBUG_LOG_NAME).warning(msg)
            print('WARNING: ' + msg, file=sys.stderr)
            self.event_type_detector.min_num_vals = self.num_periods_tsa_ini * int(self.num_init/2)

        # Test if the ETD saves enough values
        if self.event_type_detector.max_num_vals < self.num_periods_tsa_ini * int(self.num_init/2) + 500:
            msg = 'Changed the parameter max_num_vals of the ETD from %s to %s to use pregenerated critical values for the gof-test' % (
                    self.event_type_detector.max_num_vals, self.num_periods_tsa_ini * int(self.num_init/2) + 500)
            logging.getLogger(DEBUG_LOG_NAME).warning(msg)
            print('WARNING: ' + msg, file=sys.stderr)
            self.event_type_detector.max_num_vals = self.num_periods_tsa_ini * int(self.num_init/2) + 500

        self.next_persist_time = time.time() + self.aminer_config.config_properties.get(KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)
        # List of the indices of the target_paths in the ETD
        self.target_path_index_list = []
        # List of the period_lengths
        self.period_length_list = []
        # List of the the single arima_models (statsmodels)
        self.arima_models = []
        # List of the observed values and the predictions of the TSAArima
        self.prediction_history = []
        # List of the the results if th value was in the limits of the one step predictions
        self.result_list = []
        # Minimal number of successes for the binomial test in the last num_results_bt results
        self.bt_min_suc = self.bt_min_successes(self.num_results_bt, self.alpha, self.alpha_bt)

        # Loads the persistence
        self.persistence_file_name = AminerConfig.build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        self.load_persistence_data()
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 persistence_id='Default',
                 path_list=None,
                 min_num_vals=1000,
                 max_num_vals=1500,
                 save_values=True,
                 track_time_for_TSA=False,
                 waiting_time_for_TSA=300,
                 num_sections_waiting_time_for_TSA=10):
        """Initialize the detector. This will also trigger reading or creation of persistence storage location."""
        self.next_persist_time = time.time() + 600.0
        self.anomaly_event_handlers = anomaly_event_handlers
        self.num_events = 0
        # List of the longest path of the events
        self.longest_path = []
        # List of the keys corresponding to the events
        self.found_keys = []
        # List of the keys, which take values in the log-line
        self.variable_key_list = []
        # List of the values of the log-lines. If the lenght reaches max_num_vals the list gets reduced to min_num_vals values per variable
        self.values = []
        # Saves the number of lines of the event types
        self.num_eventlines = []
        # Saves the number of total log-lines
        self.total_records = 0
        # List of the modules which follow the event_type_detector. The implemented modules are form the list
        # [variableTypeDetector, variableCorrelationDetector]
        self.following_modules = []
        # List of paths, which variables are being tracked. All other paths will not get tracked. If None all paths are being tracked.
        self.path_list = path_list
        # List of bools, which state if the variables of variable_key_list are updated.
        self.check_variables = []
        # List ot the time trigger. The first list states the times when something should be triggered, the second list states the indices
        # of the eventtyps, or a list of the evnettype, a path and a value which should be counted (-1 for an initialization)
        # the third list states, the length of the time window (-1 for a one time trigger)
        self.etd_time_trigger = [[], [], []]
        # Reference containing the number of lines of the events for the TSA
        self.num_eventlines_TSA_ref = []
        # Index of the eventtype of the current log line
        self.current_index = 0
        # Number of the values which the list is being reduced to. Be cautious that this is higher than 'num_min_values'
        # in VarTypeD/Cor!!!
        self.min_num_vals = min_num_vals
        # Maximum number of lines in the value list before it is reduced. > min_num_vals.
        self.max_num_vals = max_num_vals
        # If False the values of the Token are not saved for further analysis. Disables self.values, and self.check_variables
        self.save_values = save_values
        # States if the time windows should be tracked for the time series analysis
        self.track_time_for_TSA = track_time_for_TSA
        # Time in seconds, until the time windows are being initialized
        self.waiting_time_for_TSA = waiting_time_for_TSA
        # Number of subdivisions of the initialization window. The length of the input-list of the function_Init-funtion is numSubd+1
        self.num_sections_waiting_time_for_TSA = num_sections_waiting_time_for_TSA
        self.aminer_config = aminer_config

        # Loads the persistence
        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)

        # Imports the persistence
        if persistence_data is not None:
            for key in persistence_data[0]:
                self.found_keys.append(set(key))
            self.variable_key_list = persistence_data[1]
            self.values = persistence_data[2]
            self.longest_path = persistence_data[3]
            self.check_variables = persistence_data[4]
            self.num_eventlines = persistence_data[5]
            self.etd_time_trigger = persistence_data[6]
            self.num_eventlines_TSA_ref = persistence_data[7]

            self.num_events = len(self.found_keys)
        else:
            if self.track_time_for_TSA:
                self.etd_time_trigger[0].append(-1)
                self.etd_time_trigger[1].append(-1)
                self.etd_time_trigger[2].append(-1)
Beispiel #21
0
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 event_type_detector,
                 acf_pause_interval_percentage=0.2,
                 acf_auto_pause_interval=True,
                 acf_auto_pause_interval_num_min=10,
                 build_sum_over_values=False,
                 num_periods_tsa_ini=15,
                 num_division_time_step=10,
                 alpha=0.05,
                 num_min_time_history=20,
                 num_max_time_history=30,
                 num_results_bt=15,
                 alpha_bt=0.05,
                 acf_threshold=0.2,
                 round_time_inteval_threshold=0.02,
                 force_period_length=False,
                 set_period_length=604800,
                 min_log_lines_per_time_step=10,
                 persistence_id='Default',
                 path_list=None,
                 ignore_list=None,
                 output_log_line=True,
                 auto_include_flag=True):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param event_type_detector used to track the number of events in the time windows.
        @param acf_pause_interval_percentage states which area of the resutls of the ACF are not used to find the highest peak.
        @param acf_auto_pause_interval states if the pause area is automatically set.
        If enabled, the variable acf_pause_interval_percentage loses its functionality.
        @param acf_auto_pause_interval_num_min states the number of values in which a local minima must be the minimum, to be considered a
        local minimum of the function and not an outlier.
        @param build_sum_over_values states if the sum of a series of counts is build before applying the TSA.
        @param num_periods_tsa_ini number of periods used to initialize the Arima-model.
        @param num_division_time_step number of division of the time window to calculate the time step.
        @param alpha significance level of the estimated values.
        @param num_min_time_history number of lines processed before the period length is calculated.
        @param num_max_time_history maximum number of values of the time_history.
        @param num_results_bt number of results which are used in the binomial test.
        @param alpha_bt significance level for the bt test.
        @param round_time_inteval_threshold threshold for the rounding of the time_steps to the times in self.assumed_time_steps.
        The higher the threshold the easier the time is rounded to the next time in the list.
        @param acf_threshold threshold, which has to be exceeded by the highest peak of the cdf function of the time series, to be analysed.
        @param force_period_length states if the period length is calculated through the ACF, or if the period length is forced to
        be set to set_period_length.
        @param set_period_length states how long the period length is if force_period_length is set to True.
        @param min_log_lines_per_time_step states the minimal average number of log lines per time step to make a TSA.
        @param persistence_id name of persistency document.
        @param path_list At least one of the parser paths in this list needs to appear in the event to be analysed.
        @param ignore_list list of paths that are not considered for correlation, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies.
        """
        self.aminer_config = aminer_config
        self.next_persist_time = time.time(
        ) + self.aminer_config.config_properties.get(
            KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD)
        self.anomaly_event_handlers = anomaly_event_handlers
        self.output_log_line = output_log_line
        self.auto_include_flag = auto_include_flag
        self.path_list = path_list
        if self.path_list is None:
            self.path_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []

        self.event_type_detector = event_type_detector
        self.acf_pause_interval_percentage = acf_pause_interval_percentage
        self.acf_auto_pause_interval = acf_auto_pause_interval
        self.acf_auto_pause_interval_num_min = acf_auto_pause_interval_num_min
        self.build_sum_over_values = build_sum_over_values
        self.num_periods_tsa_ini = num_periods_tsa_ini
        self.num_division_time_step = num_division_time_step
        self.alpha = alpha
        self.num_min_time_history = num_min_time_history
        self.num_max_time_history = num_max_time_history
        self.num_results_bt = num_results_bt
        self.alpha_bt = alpha_bt
        self.round_time_inteval_threshold = round_time_inteval_threshold
        self.acf_threshold = acf_threshold
        self.force_period_length = force_period_length
        self.set_period_length = set_period_length
        self.min_log_lines_per_time_step = min_log_lines_per_time_step

        # Add the TSAArimaDetector-module to the list of the modules, which use the event_type_detector.
        self.event_type_detector.add_following_modules(self)

        # History of the time windows
        self.time_window_history = []
        # List of the the single arima_models (statsmodels)
        self.arima_models = []
        # List of the observed values and the predictions of the TSAArima
        self.prediction_history = []
        # List of the times of the observations
        self.time_history = []
        # List of the the results if th value was in the limits of the one step predictions
        self.result_list = []
        # Minimal number of successes for the binomial test
        self.bt_min_suc = self.bt_min_successes(self.num_results_bt,
                                                self.alpha, self.alpha_bt)
        # Assumed occuring time steps in seconds. 1 minute: 60, 1 hour: 3600, 12 hours: 43200, 1 day: 86400, 1 week: 604800.
        self.assumed_time_steps = [60, 3600, 43200, 86400, 604800]

        # Load the persistence
        self.persistence_id = persistence_id
        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)

        # Import the persistence
        if persistence_data is not None:
            self.time_window_history = persistence_data[0]

            self.arima_models = [None for _ in self.time_window_history]
            # skipcq: PTC-W0060
            for event_index in range(len(self.arima_models)):
                if len(
                        self.time_window_history[event_index]
                ) >= self.num_periods_tsa_ini * self.num_division_time_step:
                    try:
                        if not self.build_sum_over_values:
                            model = statsmodels.tsa.arima.model.ARIMA(
                                self.time_window_history[event_index]
                                [-self.num_periods_tsa_ini *
                                 self.num_division_time_step:],
                                order=(self.num_division_time_step, 0, 0),
                                seasonal_order=(0, 0, 0,
                                                self.num_division_time_step))
                            self.arima_models[event_index] = model.fit()
                        else:
                            model = statsmodels.tsa.arima.model.ARIMA(
                                [
                                    sum(self.time_window_history[event_index]
                                        [-self.num_periods_tsa_ini *
                                         self.num_division_time_step +
                                         i:-(self.num_periods_tsa_ini - 1) *
                                         self.num_division_time_step + i])
                                    for i in range(
                                        (self.num_periods_tsa_ini - 1) *
                                        self.num_division_time_step)
                                ] + [
                                    sum(self.time_window_history[event_index]
                                        [-self.num_division_time_step:])
                                ],
                                order=(self.num_division_time_step, 0, 0),
                                seasonal_order=(0, 0, 0,
                                                self.num_division_time_step))
                            self.arima_models[event_index] = model.fit()
                    except:  # skipcq FLK-E722
                        self.arima_models[event_index] = None
                        self.time_window_history[event_index] = []
                else:
                    self.arima_models[event_index] = None
                    self.time_window_history[event_index] = []

            self.prediction_history = persistence_data[1]
            self.time_history = persistence_data[2]
            self.result_list = persistence_data[3]

            # List of the pauses of the tests to the event numbers. If an arima model was initialized with the persistency, the model must
            # be trained before it can be used for forecasts. An integer states how many tests should be skipped before the next
            # output to this event number. None if no model was initialized for this event number.
            self.test_pause = [
                self.num_division_time_step
                if arima_models_statsmodel is not None else None
                for arima_models_statsmodel in self.arima_models
            ]
            # If all entries are None set the variable to None
            if all(entry is None for entry in self.test_pause):
                self.test_pause = None
        else:
            self.test_pause = None
Beispiel #22
0
    def __init__(self,
                 aminer_config,
                 anomaly_event_handlers,
                 target_path_list=None,
                 window_size=600,
                 confidence_factor=0.5,
                 persistence_id='Default',
                 auto_include_flag=False,
                 output_log_line=True,
                 ignore_list=None,
                 constraint_list=None):
        """
        Initialize the detector. This will also trigger reading or creation of persistence storage location.
        @param aminer_config configuration from analysis_context.
        @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined
        occurrences. When no paths are specified, the events given by the full path list are analyzed.
        @param anomaly_event_handlers for handling events, e.g., print events to stdout.
        @param window_size the length of the time window for counting in seconds.
        @param confidence_factor defines range of tolerable deviation of measured frequency from ground truth frequency gt by
        [gf * confidence_factor, gf / confidence_factor]. confidence_factor must be in range [0, 1].
        @param persistence_id name of persistency document.
        @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies.
        @param output_log_line specifies whether the full parsed log atom should be provided in the output.
        @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are
        omitted. The default value is [] as None is not iterable.
        @param constrain_list list of paths that have to be present in the log atom to be analyzed.
        """
        self.target_path_list = target_path_list
        self.anomaly_event_handlers = anomaly_event_handlers
        self.auto_include_flag = auto_include_flag
        self.next_persist_time = None
        self.output_log_line = output_log_line
        self.aminer_config = aminer_config
        self.persistence_id = persistence_id
        self.constraint_list = constraint_list
        if self.constraint_list is None:
            self.constraint_list = []
        self.ignore_list = ignore_list
        if self.ignore_list is None:
            self.ignore_list = []
        self.window_size = window_size
        if not 0 <= confidence_factor <= 1:
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).warning(
                'confidence_factor must be in the range [0,1]!')
            confidence_factor = 1
        self.confidence_factor = confidence_factor
        self.next_check_time = None
        self.counts = {}
        self.counts_prev = {}
        self.log_total = 0
        self.log_success = 0
        self.log_windows = 0

        self.persistence_file_name = AminerConfig.build_persistence_file_name(
            aminer_config, self.__class__.__name__, persistence_id)
        PersistenceUtil.add_persistable_component(self)

        # Persisted data contains lists of event-frequency pairs, i.e., [[<ev1, ev2>, <freq>], [<ev1, ev2>, <freq>], ...]
        persistence_data = PersistenceUtil.load_json(
            self.persistence_file_name)
        if persistence_data is not None:
            for entry in persistence_data:
                log_event = entry[0]
                frequency = entry[1]
                self.counts_prev[tuple(log_event)] = frequency
            logging.getLogger(AminerConfig.DEBUG_LOG_NAME).debug(
                '%s loaded persistence data.', self.__class__.__name__)