def __init__(self, aminer_config, target_path_list, anomaly_event_handlers, persistence_id='Default', allow_missing_values_flag=False, auto_include_flag=False, output_log_line=True): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param target_path_list the list of values to extract from each match to create the value combination to be checked. @param allow_missing_values_flag when set to True, the detector will also use matches, where one of the pathes from target_path_list does not refer to an existing parsed data object. @param auto_include_flag when set to True, this detector will report a new value only the first time before including it in the known values set automatically. """ self.target_path_list = target_path_list self.anomaly_event_handlers = anomaly_event_handlers self.allow_missing_values_flag = allow_missing_values_flag self.auto_include_flag = auto_include_flag self.output_log_line = output_log_line self.aminer_config = aminer_config self.persistence_id = persistence_id self.log_success = 0 self.log_total = 0 self.log_learned_path_value_combos = 0 self.log_new_learned_values = [] self.persistence_file_name = build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id) self.next_persist_time = None self.known_values_set = set() self.load_persistence_data() PersistenceUtil.add_persistable_component(self)
def __init__(self, aminer_config, target_path_list, anomaly_event_handlers, persistence_id='Default', auto_include_flag=False, output_log_line=True): """Initialize the detector. This will also trigger reading or creation of persistence storage location.""" self.target_path_list = target_path_list self.anomaly_event_handlers = anomaly_event_handlers self.auto_include_flag = auto_include_flag self.output_log_line = output_log_line self.aminer_config = aminer_config self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.persistence_id = persistence_id self.log_success = 0 self.log_total = 0 self.log_learned_path_values = 0 self.log_new_learned_values = [] self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) persistence_data = PersistenceUtil.load_json( self.persistence_file_name) if persistence_data is None: self.known_values_set = set() else: self.known_values_set = set(persistence_data) logging.getLogger(DEBUG_LOG_NAME).debug( '%s loaded persistence data.', self.__class__.__name__)
def __init__(self, aminer_config, property_path, bin_definition, report_interval, report_event_handlers, reset_after_report_flag=True, persistence_id='Default', output_log_line=True): """ Initialize the analysis component. @param report_interval delay in seconds between creation of two reports. The parameter is applied to the parsed record data time, not the system time. Hence reports can be delayed when no data is received. """ self.last_report_time = None self.next_report_time = 0.0 self.property_path = property_path self.bin_definition = bin_definition self.histogram_data = {} self.report_interval = report_interval self.report_event_handlers = report_event_handlers self.reset_after_report_flag = reset_after_report_flag self.persistence_id = persistence_id self.next_persist_time = None self.output_log_line = output_log_line self.aminer_config = aminer_config self.persistence_file_name = build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) persistence_data = PersistenceUtil.load_json(self.persistence_file_name) if persistence_data is not None: msg = 'No data reading, def merge yet' logging.getLogger(DEBUG_LOG_NAME).error(msg) raise Exception(msg)
def tearDown(self): """Delete all persisted data after the tests.""" self.aminer_config = load_config(self.__configFilePath) persistence_file_name = build_persistence_file_name(self.aminer_config) if os.path.exists(persistence_file_name): shutil.rmtree(persistence_file_name) if not os.path.exists(persistence_file_name): os.makedirs(persistence_file_name) SecureOSFunctions.close_base_directory()
def __init__(self, aminer_config, target_path_list, anomaly_event_handlers, persistence_id='Default', auto_include_flag=False, default_interval=3600, realert_interval=86400, output_log_line=True): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param target_path_list to extract a source identification value from each logatom. """ self.target_path_list = target_path_list self.anomaly_event_handlers = anomaly_event_handlers self.auto_include_flag = auto_include_flag self.default_interval = default_interval self.realert_interval = realert_interval # This timestamps is compared with timestamp values from log atoms for activation of alerting logic. The first timestamp from logs # above this value will trigger alerting. self.next_check_timestamp = 0 self.last_seen_timestamp = 0 self.output_log_line = output_log_line self.aminer_config = aminer_config self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.persistence_id = persistence_id self.log_success = 0 self.log_total = 0 self.log_learned_values = 0 self.log_new_learned_values = [] self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) persistence_data = PersistenceUtil.load_json( self.persistence_file_name) self.expected_values_dict = {} if persistence_data is not None: for key in persistence_data: value = persistence_data[key] if self.target_path_list is not None: # skipcq: PTC-W0048 if value[3] != self.target_path_list: continue elif self.target_path_list is not None and value[ 3] not in self.target_path_list: continue if value[1] != default_interval: value[1] = default_interval value[2] = value[0] + default_interval self.expected_values_dict[key] = value logging.getLogger(DEBUG_LOG_NAME).debug( '%s loaded persistence data.', self.__class__.__name__) self.analysis_string = 'Analysis.%s'
def __init__(self, aminer_config, anomaly_event_handlers, timestamp_path, analyze_path_list, min_bin_elements, min_bin_time, debug_mode=False, persistence_id='Default', output_log_line=True): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param timestamp_path if not None, use this path value for timestamp based bins. @param analyze_path_list list of match paths to analyze in this detector. @param min_bin_elements evaluate the latest bin only after at least that number of elements was added to it. @param min_bin_time evaluate the latest bin only when the first element is received after min_bin_time has elapsed. @param debug_mode if true, generate an analysis report even when average of last bin was within expected range. """ self.anomaly_event_handlers = anomaly_event_handlers self.timestamp_path = timestamp_path self.min_bin_elements = min_bin_elements self.min_bin_time = min_bin_time self.debug_mode = debug_mode self.next_persist_time = None self.persistence_id = persistence_id self.output_log_line = output_log_line self.aminer_config = aminer_config self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) persistence_data = PersistenceUtil.load_json( self.persistence_file_name) self.stat_data = [] for path in analyze_path_list: self.stat_data.append(( path, [], )) if persistence_data is not None: for val in persistence_data: if isinstance(val, str): val = val.strip('[').strip(']').split(',', 2) path = val[0].strip('"') values = val[1].strip(' ').strip('[').strip(']') else: path = val[0] values = val[1] index = 0 for p, _ in self.stat_data: if p == path: break index += 1 for value in values: self.stat_data[index][1].append(value)
def __init__(self, aminer_config, anomaly_event_handlers, target_path_list=None, window_size=600, confidence_factor=0.5, persistence_id='Default', auto_include_flag=False, output_log_line=True, ignore_list=None, constraint_list=None): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param aminer_config configuration from analysis_context. @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined occurrences. When no paths are specified, the events given by the full path list are analyzed. @param anomaly_event_handlers for handling events, e.g., print events to stdout. @param window_size the length of the time window for counting in seconds. @param confidence_factor defines range of tolerable deviation of measured frequency from ground truth frequency gt by [gf * confidence_factor, gf / confidence_factor]. confidence_factor must be in range [0, 1]. @param persistence_id name of persistency document. @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies. @param output_log_line specifies whether the full parsed log atom should be provided in the output. @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are omitted. The default value is [] as None is not iterable. @param constrain_list list of paths that have to be present in the log atom to be analyzed. """ self.target_path_list = target_path_list self.anomaly_event_handlers = anomaly_event_handlers self.auto_include_flag = auto_include_flag self.next_persist_time = None self.output_log_line = output_log_line self.aminer_config = aminer_config self.persistence_id = persistence_id self.constraint_list = constraint_list if self.constraint_list is None: self.constraint_list = [] self.ignore_list = ignore_list if self.ignore_list is None: self.ignore_list = [] self.window_size = window_size if not 0 <= confidence_factor <= 1: logging.getLogger(DEBUG_LOG_NAME).warning('confidence_factor must be in the range [0,1]!') confidence_factor = 1 self.confidence_factor = confidence_factor self.next_check_time = None self.counts = {} self.counts_prev = {} self.log_total = 0 self.log_success = 0 self.log_windows = 0 self.persistence_file_name = build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) # Persisted data contains lists of event-frequency pairs, i.e., [[<ev1, ev2>, <freq>], [<ev1, ev2>, <freq>], ...] persistence_data = PersistenceUtil.load_json(self.persistence_file_name) if persistence_data is not None: for entry in persistence_data: log_event = entry[0] frequency = entry[1] self.counts_prev[tuple(log_event)] = frequency logging.getLogger(DEBUG_LOG_NAME).debug('%s loaded persistence data.', self.__class__.__name__)
def __init__(self, aminer_config, anomaly_event_handlers, parallel_check_count, persistence_id='Default', record_count_before_event=10000, output_log_line=True, use_path_match=True, use_value_match=True, min_rule_attributes=1, max_rule_attributes=5): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param parallel_check_count number of rule detection checks to run in parallel. @param record_count_before_event number of events used to calculate statistics (i.e., window size) @param min_rule_attributes minimum number of attributes forming a rule @param max_rule_attributes maximum number of attributes forming a rule @param use_path_match if true rules are build based on path existance @param use_value_match if true rules are built based on actual values """ self.last_timestamp = 0.0 self.parallel_check_count = parallel_check_count self.anomaly_event_handlers = anomaly_event_handlers self.min_rule_attributes = min_rule_attributes self.max_rule_attributes = max_rule_attributes self.last_unhandled_match = None self.total_records = 0 self.record_count_before_event = record_count_before_event self.persistence_id = persistence_id self.output_log_line = output_log_line self.use_path_match = use_path_match self.use_value_match = use_value_match self.aminer_config = aminer_config self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) persistence_data = PersistenceUtil.load_json( self.persistence_file_name) if persistence_data is None: self.feature_list = [] self.event_count_table = [ 0 ] * parallel_check_count * parallel_check_count * 2 self.event_delta_table = [ 0 ] * parallel_check_count * parallel_check_count * 2 else: logging.getLogger(DEBUG_LOG_NAME).debug( '%s loaded persistence data.', self.__class__.__name__)
def __init__(self, aminer_config, target_path_list, anomaly_event_handlers, id_path_list, min_allowed_time_diff, persistence_id='Default', allow_missing_values_flag=False, auto_include_flag=False, output_log_line=True): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param target_path_list the list of values to extract from each match to create the value combination to be checked. @param id_path_list the list of pathes where id values can be stored in all relevant log event types. @param min_allowed_time_diff the minimum amount of time in seconds after the first appearance of a log atom with a specific id that is waited for other log atoms with the same id to occur. The maximum possible time to keep an incomplete combo is 2*min_allowed_time_diff @param allow_missing_values_flag when set to True, the detector will also use matches, where one of the pathes from target_path_list does not refer to an existing parsed data object. @param auto_include_flag when set to True, this detector will report a new value only the first time before including it in the known values set automatically. """ self.target_path_list = target_path_list self.anomaly_event_handlers = anomaly_event_handlers self.id_path_list = id_path_list self.min_allowed_time_diff = min_allowed_time_diff self.allow_missing_values_flag = allow_missing_values_flag self.auto_include_flag = auto_include_flag self.output_log_line = output_log_line self.aminer_config = aminer_config self.persistence_id = persistence_id self.log_success = 0 self.log_total = 0 self.log_learned_path_value_combos = 0 self.log_new_learned_values = [] self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.load_persistence_data() PersistenceUtil.add_persistable_component(self) self.id_dict_current = {} self.id_dict_old = {} self.next_shift_time = None
def setUp(self): """Set up all needed variables and remove persisted data.""" PersistenceUtil.persistable_components = [] self.aminer_config = load_config(self.__configFilePath) self.analysis_context = AnalysisContext(self.aminer_config) self.output_stream = StringIO() self.stream_printer_event_handler = StreamPrinterEventHandler( self.analysis_context, self.output_stream) persistence_dir_name = build_persistence_file_name(self.aminer_config) if os.path.exists(persistence_dir_name): shutil.rmtree(persistence_dir_name) if not os.path.exists(persistence_dir_name): os.makedirs(persistence_dir_name) initialize_loggers(self.aminer_config, os.getuid(), os.getgid()) if isinstance(persistence_dir_name, str): persistence_dir_name = persistence_dir_name.encode() SecureOSFunctions.secure_open_base_directory( persistence_dir_name, os.O_RDONLY | os.O_DIRECTORY | os.O_PATH) PersistenceUtil.SKIP_PERSISTENCE_ID_WARNING = True
def __init__(self, program_name, aminer_config): self.program_name = program_name self.aminer_config = aminer_config self.analysis_context = AnalysisContext(aminer_config) self.run_analysis_loop_flag = True self.log_streams_by_name = {} self.persistence_file_name = build_persistence_file_name( self.analysis_context.aminer_config, self.__class__.__name__ + '/RepositioningData') self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.repositioning_data_dict = {} self.master_control_socket = None self.remote_control_socket = None # This dictionary provides a lookup list from file descriptor to associated object for handling the data to and from the given # descriptor. Currently supported handler objects are: # * Parent process socket # * Remote control listening socket # * LogStreams # * Remote control connections self.tracked_fds_dict = {} # Override the signal handler to allow graceful shutdown. def graceful_shutdown_handler(_signo, _stack_frame): """React on typical shutdown signals.""" msg = '%s: caught signal, shutting down' % program_name print(msg, file=sys.stderr) logging.getLogger(DEBUG_LOG_NAME).info(msg) self.run_analysis_loop_flag = False import signal signal.signal(signal.SIGHUP, graceful_shutdown_handler) signal.signal(signal.SIGINT, graceful_shutdown_handler) signal.signal(signal.SIGTERM, graceful_shutdown_handler) # Do this on at the end of the initialization to avoid having partially initialized objects inside the registry. self.analysis_context.add_time_triggered_component(self)
def __init__(self, aminer_config, histogram_defs, report_interval, report_event_handlers, reset_after_report_flag=True, persistence_id='Default', output_log_line=True): """ Initialize the analysis component. @param histogram_defs is a list of tuples containing the target property path to analyze and the BinDefinition to apply for binning. @param report_interval delay in seconds between creation of two reports. The parameter is applied to the parsed record data time, not the system time. Hence reports can be delayed when no data is received. """ self.last_report_time = None self.next_report_time = 0.0 self.histogram_data = [] for (path, bin_definition) in histogram_defs: self.histogram_data.append(HistogramData(path, bin_definition)) self.report_interval = report_interval self.report_event_handlers = report_event_handlers self.reset_after_report_flag = reset_after_report_flag self.persistence_id = persistence_id self.output_log_line = output_log_line self.aminer_config = aminer_config self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) persistence_data = PersistenceUtil.load_json( self.persistence_file_name) if persistence_data is not None: msg = 'No data reading, def merge yet' logging.getLogger(DEBUG_LOG_NAME).error(msg) raise Exception(msg)
def __init__(self, aminer_config, ruleset, anomaly_event_handlers, persistence_id='Default', output_log_line=True): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param ruleset a list of MatchRule rules with appropriate CorrelationRules attached as actions. """ self.aminer_config = aminer_config self.event_classification_ruleset = ruleset self.anomaly_event_handlers = anomaly_event_handlers self.next_persist_time = time.time() + self.aminer_config.config_properties.get(KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.persistence_id = persistence_id self.output_log_line = output_log_line self.last_log_atom = None event_correlation_set = set() for rule in self.event_classification_ruleset: if rule.match_action.artefact_a_rules is not None: event_correlation_set |= set(rule.match_action.artefact_a_rules) if rule.match_action.artefact_b_rules is not None: event_correlation_set |= set(rule.match_action.artefact_b_rules) self.event_correlation_ruleset = list(event_correlation_set) self.persistence_file_name = build_persistence_file_name(aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self)
def __init__(self, aminer_config, anomaly_event_handlers, id_path_list=None, target_path_list=None, seq_len=3, allow_missing_id=False, timeout=-1, persistence_id='Default', auto_include_flag=False, output_log_line=True, ignore_list=None, constraint_list=None): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param aminer_config configuration from analysis_context. @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined occurrences. When no paths are specified, the events given by the full path list are analyzed. @param anomaly_event_handlers for handling events, e.g., print events to stdout. @param id_path_list one or more paths that specify the trace of the sequence detection, i.e., incorrect sequences that are generated by interleaved events can be avoided when event sequence identifiers are available. @param seq_len the length of the sequences to be learned (larger lengths increase precision, but may overfit the data). @param allow_missing_id specifies whether log atoms without id path should be omitted (only if id path is set). @param timeout maximum allowed seconds between two entries of sequence; sequence is split in subsequences if exceeded. @param persistence_id name of persistency document. @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies. @param output_log_line specifies whether the full parsed log atom should be provided in the output. @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are omitted. The default value is [] as None is not iterable. @param constrain_list list of paths that have to be present in the log atom to be analyzed. """ self.target_path_list = target_path_list self.anomaly_event_handlers = anomaly_event_handlers self.auto_include_flag = auto_include_flag self.output_log_line = output_log_line self.aminer_config = aminer_config self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.persistence_id = persistence_id self.id_path_list = id_path_list self.allow_missing_id = allow_missing_id self.timeout = timeout if id_path_list is None: self.id_path_list = [] self.constraint_list = constraint_list if self.constraint_list is None: self.constraint_list = [] self.ignore_list = ignore_list if self.ignore_list is None: self.ignore_list = [] self.seq_len = seq_len self.sequences = set() self.current_sequences = {} self.last_seen_times = {} self.log_total = 0 self.log_success = 0 self.log_learned = 0 self.log_learned_sequences = [] self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) # Persisted data contains lists of sequences, i.e., [[<seq1_elem1>, <seq1_elem2>], [<seq2_elem1, ...], ...] # Thereby, sequence elements may be tuples, i.e., combinations of values, or paths that define events. persistence_data = PersistenceUtil.load_json( self.persistence_file_name) if persistence_data is not None: for sequence in persistence_data: sequence_elem_tuple = [] for sequence_elem in sequence: sequence_elem_tuple.append(tuple(sequence_elem)) self.sequences.add(tuple(sequence_elem_tuple)) logging.getLogger(DEBUG_LOG_NAME).debug( '%s loaded persistence data.', self.__class__.__name__)
def __init__(self, aminer_config, anomaly_event_handlers, persistence_id='Default', path_list=None, id_path_list=None, allow_missing_id=False, allowed_id_tuples=None, min_num_vals=1000, max_num_vals=1500, save_values=True, track_time_for_tsa=False, waiting_time_for_tsa=1000, num_sections_waiting_time_for_tsa=100): """Initialize the detector. This will also trigger reading or creation of persistence storage location.""" self.anomaly_event_handlers = anomaly_event_handlers # one or more paths that specify the trace of the sequence detection, i.e., incorrect sequences that are generated by interleaved # events can be avoided when event sequence identifiers are available (list of strings, defaults to empty list). self.id_path_list = id_path_list if self.id_path_list is None: self.id_path_list = [] # Specifies whether log atoms without id path should be omitted (only if id path is set). self.allow_missing_id = allow_missing_id # List of the allowed id tuples. Log atoms with id tuples not in this list are not analyzed, when this list is not empty. if allowed_id_tuples is None: self.allowed_id_tuples = [] else: self.allowed_id_tuples = [ tuple(tuple_list) for tuple_list in allowed_id_tuples ] # Number of the values which the list is being reduced to. self.min_num_vals = min_num_vals # Maximum number of lines in the value list before it is reduced. > min_num_vals. self.max_num_vals = max_num_vals # If False the values of the Token are not saved for further analysis. Disables self.values, and self.check_variables self.save_values = save_values # States if the time windows should be tracked for the time series analysis self.track_time_for_tsa = track_time_for_tsa # Time in seconds, until the time windows are being initialized self.waiting_time_for_tsa = waiting_time_for_tsa # Number of sections of the initialization window. The length of the input-list of the calculate_time_steps is this number self.num_sections_waiting_time_for_tsa = num_sections_waiting_time_for_tsa self.aminer_config = aminer_config self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.num_events = 0 # List of the longest path of the events self.longest_path = [] # List of the keys corresponding to the events self.found_keys = [] # List of the keys, which take values in the log-line self.variable_key_list = [] # List of the values of the log-lines. If the lenght reaches max_num_vals the list gets reduced to min_num_vals values per variable self.values = [] # Saves the number of lines of the event types self.num_eventlines = [] # Saves the number of total log-lines self.total_records = 0 # List of the modules which follow the event_type_detector. The implemented modules are form the list # [variableTypeDetector, variableCorrelationDetector] self.following_modules = [] # List of paths, which variables are being tracked. All other paths will not get tracked. If None all paths are being tracked. self.path_list = path_list # List of bools, which state if the variables of variable_key_list are updated. self.check_variables = [] # List ot the time trigger. The first list states the times when something should be triggered, the second list states the indices # of the eventtyps, or a list of the evnettype, a path and a value which should be counted (-1 for an initialization) # the third list states, the length of the time step (-1 for a one time trigger) self.etd_time_trigger = [[], [], []] # Reference containing the number of lines of the events for the TSA self.num_eventlines_tsa_ref = [] # Index of the eventtype of the current log line self.current_index = 0 # List of the id tuples self.id_path_list_tuples = [] # Loads the persistence self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) persistence_data = PersistenceUtil.load_json( self.persistence_file_name) # Imports the persistence if persistence_data is not None: for key in persistence_data[0]: self.found_keys.append(set(key)) self.variable_key_list = persistence_data[1] self.values = persistence_data[2] self.longest_path = persistence_data[3] self.check_variables = persistence_data[4] self.num_eventlines = persistence_data[5] self.etd_time_trigger = persistence_data[6] self.num_eventlines_tsa_ref = persistence_data[7] self.id_path_list_tuples = [ tuple(tuple_list) for tuple_list in persistence_data[8] ] self.num_events = len(self.found_keys) else: if self.track_time_for_tsa: self.etd_time_trigger[0].append(-1) self.etd_time_trigger[1].append(-1) self.etd_time_trigger[2].append(-1)
def __init__(self, aminer_config, anomaly_event_handlers, target_path_list=None, window_size=600, num_windows=50, confidence_factor=0.33, empty_window_warnings=True, early_exceeding_anomaly_output=False, set_lower_limit=None, set_upper_limit=None, persistence_id='Default', auto_include_flag=False, output_log_line=True, ignore_list=None, constraint_list=None): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param aminer_config configuration from analysis_context. @param target_path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined occurrences. When no paths are specified, the events given by the full path list are analyzed. @param anomaly_event_handlers for handling events, e.g., print events to stdout. @param window_size the length of the time window for counting in seconds. @param num_windows the number of previous time windows considered for expected frequency estimation. @param confidence_factor defines range of tolerable deviation of measured frequency from expected frequency according to occurrences_mean +- occurrences_std / self.confidence_factor. Default value is 0.33 = 3*sigma deviation. confidence_factor must be in range [0, 1]. @param empty_window_warnings whether anomalies should be generated for too small window sizes. @param early_exceeding_anomaly_output states if a anomaly should be raised the first time the appearance count exceedes the range. @param set_lower_limit sets the lower limit of the frequency test to the specified value. @param set_upper_limit sets the upper limit of the frequency test to the specified value. @param persistence_id name of persistency document. @param auto_include_flag specifies whether new frequency measurements override ground truth frequencies. @param output_log_line specifies whether the full parsed log atom should be provided in the output. @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are omitted. The default value is [] as None is not iterable. @param constrain_list list of paths that have to be present in the log atom to be analyzed. """ self.target_path_list = target_path_list self.anomaly_event_handlers = anomaly_event_handlers self.auto_include_flag = auto_include_flag self.output_log_line = output_log_line self.aminer_config = aminer_config self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) self.persistence_id = persistence_id self.constraint_list = constraint_list if self.constraint_list is None: self.constraint_list = [] self.ignore_list = ignore_list if self.ignore_list is None: self.ignore_list = [] self.window_size = window_size self.num_windows = num_windows if not 0 <= confidence_factor <= 1: logging.getLogger(DEBUG_LOG_NAME).warning( 'confidence_factor must be in the range [0,1]!') confidence_factor = 1 self.confidence_factor = confidence_factor self.empty_window_warnings = empty_window_warnings self.early_exceeding_anomaly_output = early_exceeding_anomaly_output self.set_lower_limit = set_lower_limit self.set_upper_limit = set_upper_limit self.next_check_time = None self.counts = {} self.ranges = {} self.exceeded_range_frequency = {} self.log_total = 0 self.log_success = 0 self.log_windows = 0 self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) # Persisted data contains lists of event-frequency pairs, i.e., [[<ev>, [<freq1, freq2>]], [<ev>, [<freq1, freq2>]], ...] persistence_data = PersistenceUtil.load_json( self.persistence_file_name) if persistence_data is not None: for entry in persistence_data: log_event = entry[0] freqs = entry[1] # In case that num_windows differ, only take as many as possible self.counts[tuple( log_event)] = freqs[max(0, len(freqs) - num_windows - 1):] + [0] logging.getLogger(DEBUG_LOG_NAME).debug( '%s loaded persistence data.', self.__class__.__name__)
def __init__(self, aminer_config, anomaly_event_handlers, path_list=None, id_path_list=None, ignore_list=None, allow_missing_id=False, num_log_lines_solidify_matrix=100, time_output_threshold=0, anomaly_threshold=0.05, persistence_id='Default', auto_include_flag=False, output_log_line=True): """ Initialize the detector. This will also trigger reading or creation of persistence storage location. @param aminer_config configuration from analysis_context. @param anomaly_event_handlers for handling events, e.g., print events to stdout. @param path_list parser paths of values to be analyzed. Multiple paths mean that values are analyzed by their combined occurrences. When no paths are specified, the events given by the full path list are analyzed. @param id_path_list the list of paths where id values can be stored in all relevant log event types. @param ignore_list list of paths that are not considered for analysis, i.e., events that contain one of these paths are omitted. The default value is [] as None is not iterable. @param allow_missing_id when set to True, the detector will also use matches, where one of the pathes from target_path_list does not refer to an existing parsed data object. @param num_log_lines_solidify_matrix number of processed log lines after which the matrix is solidified. This process is periodically repeated. @param time_output_threshold threshold for the tested minimal transition time which has to be exceeded to be tested. @param anomaly_threshold threshold for the confidence which must be exceeded to raise an anomaly. @param persistence_id name of persistency document. @param auto_include_flag specifies whether newly observed sequences should be added to the learned model @param output_log_line specifies whether the full parsed log atom should be provided in the output. """ # Input parameters self.aminer_config = aminer_config self.anomaly_event_handlers = anomaly_event_handlers self.path_list = path_list if self.path_list is None: self.path_list = [] self.id_path_list = id_path_list if self.id_path_list is None: self.id_path_list = [] self.ignore_list = ignore_list if self.ignore_list is None: self.ignore_list = [] self.allow_missing_id = allow_missing_id self.num_log_lines_solidify_matrix = num_log_lines_solidify_matrix self.time_output_threshold = time_output_threshold self.anomaly_threshold = anomaly_threshold self.persistence_id = persistence_id self.auto_include_flag = auto_include_flag self.output_log_line = output_log_line self.next_persist_time = time.time( ) + self.aminer_config.config_properties.get( KEY_PERSISTENCE_PERIOD, DEFAULT_PERSISTENCE_PERIOD) # Test if both path_list and id_path_list are not empty if [] in (self.path_list, self.id_path_list): msg = 'Both paths and id_path_list must not be empty.' logging.getLogger(DEBUG_LOG_NAME).warning(msg) print('WARNING: ' + msg, file=sys.stderr) # Initialization auxiliary variables self.time_matrix = {} self.last_value = {} self.last_time = {} self.log_total = 0 # Load persistency self.persistence_file_name = build_persistence_file_name( aminer_config, self.__class__.__name__, persistence_id) PersistenceUtil.add_persistable_component(self) persistence_data = PersistenceUtil.load_json( self.persistence_file_name) if persistence_data is not None: return_matrix = persistence_data[0] keys_1 = [tuple(key) for key in persistence_data[1]] keys_2 = [[tuple(key) for key in persistence_data[2][i]] for i in range(len(persistence_data[2]))] self.time_matrix = { keys_1[i]: { keys_2[i][j]: return_matrix[i][j] for j in range(len(keys_2[i])) } for i in range(len(keys_1)) }