def test3append_values_float(self): """This unittest checks the append_values method with raw_match_object being a float value.""" event_type_detector = EventTypeDetector( self.aminer_config, [self.stream_printer_event_handler]) # initialize all values. t = time.time() log_atom = LogAtom( b'22.2', ParserMatch(MatchElement('path', '22.2', 22.2, None)), t, self.__class__.__name__) event_type_detector.receive_atom(log_atom) event_type_detector.values = [[[]]] event_type_detector.append_values(log_atom, 0) self.assertEqual(event_type_detector.values, [[[22.2]]]) log_atom = LogAtom(b'22', ParserMatch(MatchElement('path', '22', 22, None)), t, self.__class__.__name__) event_type_detector.values = [[[]]] event_type_detector.append_values(log_atom, 0) self.assertEqual(event_type_detector.values, [[[22]]]) log_atom = LogAtom( b'22.2', ParserMatch(MatchElement('path', '22', b'22', None)), t, self.__class__.__name__) event_type_detector.values = [[[]]] event_type_detector.append_values(log_atom, 0) self.assertEqual(event_type_detector.values, [[[22]]])
def test4append_values_bytestring(self): """ This unittest checks the append_values method with raw_match_object being a bytestring. This should trigger a ValueError and append the match_string. """ event_type_detector = EventTypeDetector( self.aminer_config, [self.stream_printer_event_handler]) # initialize all values. t = time.time() log_atom = LogAtom( b'This is a string', ParserMatch( MatchElement('path', 'This is a string', b'This is a string', None)), t, self.__class__.__name__) event_type_detector.receive_atom(log_atom) event_type_detector.values = [[[]]] event_type_detector.append_values(log_atom, 0) self.assertEqual(event_type_detector.values, [[['This is a string']]]) log_atom = LogAtom( b'24.05.', ParserMatch(MatchElement('path', '24.05.', b'24.05.', None)), t, self.__class__.__name__) event_type_detector.values = [[[]]] event_type_detector.append_values(log_atom, 0) self.assertEqual(event_type_detector.values, [[['24.05.']]])
def run_time_correlation_detector(self, number_of_rules): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: time_correlation_detector = TimeCorrelationDetector( self.aminer_config, 2, number_of_rules, 0, [self.stream_printer_event_handler], record_count_before_event=self.waiting_time * 9000) t = time.time() seconds = time.time() i = 0 while int(time.time() - seconds) < self.waiting_time: decimal_integer_value_me = DecimalIntegerValueModelElement( 'd', DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) match_context = MatchContext(str(i % 100).encode()) match_element = decimal_integer_value_me.get_match_element( 'integer', match_context) log_atom = LogAtom(match_element.match_string, ParserMatch(match_element), t, time_correlation_detector) time_correlation_detector.receive_atom(log_atom) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( time_correlation_detector.__class__.__name__, avg, results, 'testCount=%d.' % number_of_rules)
def test2receive_event_with_same_event_data_attributes(self): """In this test case an attribute of AnalysisComponent is overwritten and an JsonError attribute is expected.""" json_converter_handler = JsonConverterHandler( [self.stream_printer_event_handler], self.analysis_context) log_atom = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element), self.t, self) self.analysis_context.register_component(self, self.description) event_data = { 'AnalysisComponent': { 'AffectedParserPaths': ['test/path/1', 'test/path/2'], 'Message': 'An other event happened too!' } } json_converter_handler.receive_event(self.test_detector, self.event_message, self.sorted_log_lines, event_data, log_atom, self) self.assertEqual( self.output_stream.getvalue(), self.expected_string % (datetime.fromtimestamp(self.t).strftime("%Y-%m-%d %H:%M:%S"), self.event_message, self.__class__.__name__, self.description, self.__class__.__name__, self.description, self.event_message, self.persistence_id, round(float("%.2f" % self.t), 2), ',\n "JsonError": "AnalysisComponent attribute \'Message\' is already in use and can not be overwritten!\\n"' ))
def run_new_match_path_detector(self, number_of_pathes): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: new_match_path_detector = NewMatchPathDetector( self.aminer_config, [self.stream_printer_event_handler], 'Default', True) t = round(time.time(), 3) seconds = time.time() i = 0 while int(time.time() - seconds) < self.waiting_time: decimal_integer_value_me = DecimalIntegerValueModelElement( 'd' + str(i % number_of_pathes), DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) match_context = MatchContext(str(i).encode()) match_element = decimal_integer_value_me.get_match_element( 'integer', match_context) log_atom = LogAtom(match_element.match_string, ParserMatch(match_element), t, new_match_path_detector) new_match_path_detector.receive_atom(log_atom) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( new_match_path_detector.__class__.__name__, avg, results, self.different_pathes % number_of_pathes)
def test1receive_serialized_data(self): """This unittest tests the receive_event method with serialized data from the JsonConverterHandler.""" json_converter_handler = JsonConverterHandler( [self.stream_printer_event_handler], self.analysis_context) log_atom = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element), self.t, self) self.analysis_context.register_component(self, self.description) event_data = { 'AnalysisComponent': { 'AffectedParserPaths': ['test/path/1', 'test/path/2'] } } json_converter_handler.receive_event(self.test_detector, self.event_message, self.sorted_log_lines, event_data, log_atom, self) output = self.output_stream.getvalue() kafka_event_handler = KafkaEventHandler( self.analysis_context, self.kafka_topic, { 'bootstrap_servers': ['localhost:9092'], 'api_version': (2, 0, 1) }) self.assertTrue( kafka_event_handler.receive_event(self.test_detector, self.event_message, self.sorted_log_lines, output, log_atom, self)) self.assertEqual( self.consumer.__next__().value, self.expected_string % (datetime.fromtimestamp(self.t).strftime("%Y-%m-%d %H:%M:%S"), self.event_message, self.__class__.__name__, self.description, self.__class__.__name__, self.description, self.event_message, self.persistence_id, round(self.t, 2), ""))
def whitelist_event_in_component(self, analysis_context, component_name, event_data, whitelisting_data=None): component = analysis_context.get_component_by_name(component_name) if component is None: self.REMOTE_CONTROL_RESPONSE += "FAILURE: component '%s' does not exist!" % component return if component.__class__.__name__ not in [ "EnhancedNewMatchPathValueComboDetector", "MissingMatchPathValueDetector", "NewMatchPathDetector", "NewMatchPathValueComboDetector" ]: self.REMOTE_CONTROL_RESPONSE += "FAILURE: component class '%s' does not support whitelisting! Only the following classes " \ "support whitelisting: EnhancedNewMatchPathValueComboDetector, MissingMatchPathValueDetector," \ " NewMatchPathDetector and NewMatchPathValueComboDetector." % component.__class__.__name__ return try: if component.__class__.__name__ == "MissingMatchPathValueDetector": self.REMOTE_CONTROL_RESPONSE += component.whitelist_event( "Analysis.%s" % component.__class__.__name__, [component.__class__.__name__], event_data, whitelisting_data) else: self.REMOTE_CONTROL_RESPONSE += component.whitelist_event( "Analysis.%s" % component.__class__.__name__, [component.__class__.__name__], [LogAtom("", None, 1666.0, None), event_data], whitelisting_data) # skipcq: PYL-W0703 except Exception as e: self.REMOTE_CONTROL_RESPONSE += "Exception: " + repr(e)
def test2receive_atoms_with_defined_path_list(self): """ In this test case multiple log_atoms are received with default values of the EventTypeDetector. path_list is set to a static list of paths and variable_key_list should not be used. """ event_type_detector = EventTypeDetector( self.aminer_config, [self.stream_printer_event_handler], path_list=['parser/type/path/nametype']) results = [ True, False, True, False, True, False, True, True, False, False, True, True, False, True, False, True, False, True, False, False, True ] log_atoms = [] for line in self.log_lines: t = time.time() log_atoms.append( LogAtom( line, ParserMatch( self.parsing_model.get_match_element( 'parser', MatchContext(line))), t, self.__class__.__name__)) for i, log_atom in enumerate(log_atoms): old_vals = (event_type_detector.num_events, event_type_detector.num_eventlines, event_type_detector.total_records, event_type_detector.longest_path) self.assertEqual(event_type_detector.receive_atom(log_atom), not results[i], i) if results[i]: self.assertEqual(old_vals, (event_type_detector.num_events, event_type_detector.num_eventlines, event_type_detector.total_records, event_type_detector.longest_path))
def test1log_atom_not_in_path_list(self): """This unittest checks if no action happens, when no path in the match_dictionary matches a target_path.""" parser_count = ParserCount(self.aminer_config, ['fixed/seq', 'fixed/seq/m1', 'fixed/seq/m2'], [self.stream_printer_event_handler]) t = time.time() log_atom = LogAtom(self.fixed_dme_m3.fixed_data, ParserMatch(self.match_element_m3), t, parser_count) old_count_dict = dict(parser_count.count_dict) parser_count.receive_atom(log_atom) self.assertEqual(parser_count.count_dict, old_count_dict)
def test6receive_atom_without_target_paths(self): """This unittest tests the receive_atom method with multiple paths matching without having target_paths specified.""" parser_count = ParserCount(self.aminer_config, None, [self.stream_printer_event_handler]) t = time.time() log_atom = LogAtom(self.match_context_seq.match_data, ParserMatch(self.match_element_seq), t, parser_count) old_count_dict = dict(parser_count.count_dict) old_count_dict['fixed/seq'] = {current_processed_lines_str: 1, total_processed_lines_str: 1} parser_count.receive_atom(log_atom) self.assertEqual(parser_count.count_dict, old_count_dict)
def test2log_atom_matches_single_path(self): """This unittest tests the receive_atom method with a single path matching.""" parser_count = ParserCount(self.aminer_config, ['fixed/seq', 'fixed/seq/m1', 'fixed/seq/m2', 'fixed/m3'], [self.stream_printer_event_handler]) t = time.time() log_atom = LogAtom(self.fixed_dme_m3.fixed_data, ParserMatch(self.match_element_m3), t, parser_count) old_count_dict = dict(parser_count.count_dict) old_count_dict['fixed/m3'][current_processed_lines_str] = 1 old_count_dict['fixed/m3'][total_processed_lines_str] = 1 parser_count.receive_atom(log_atom) self.assertEqual(parser_count.count_dict, old_count_dict)
def test4do_timer(self): """This unittest checks if the do_timer method works properly.""" parser_count = ParserCount(self.aminer_config, ['fixed/m3'], [self.stream_printer_event_handler], 600) t = time.time() self.assertEqual(int(parser_count.do_timer(t + 100)), 600) self.assertEqual(self.output_stream.getvalue(), "") log_atom = LogAtom(self.match_context_seq.match_data, ParserMatch(self.match_element_seq), t, parser_count) parser_count.receive_atom(log_atom) self.assertEqual(int(parser_count.do_timer(t + 100)), 500) self.assertEqual(self.output_stream.getvalue(), "") self.assertEqual(parser_count.do_timer(t + 601), 600) self.assertNotEqual(self.output_stream.getvalue(), "") self.reset_output_stream()
def run_match_value_stream_writer(self, number_of_pathes): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: i = 0 path_list = [] parsing_model = [] while i < number_of_pathes / 2: path_list.append('match/integer/d' + str(i % number_of_pathes)) path_list.append('match/integer/s' + str(i % number_of_pathes)) parsing_model.append( DecimalIntegerValueModelElement( 'd' + str(i % number_of_pathes), DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE)) parsing_model.append( FixedDataModelElement('s' + str(i % number_of_pathes), b' Euro ')) i = i + 1 sequence_model_element = SequenceModelElement( 'integer', parsing_model) match_value_stream_writer = MatchValueStreamWriter( self.output_stream, path_list, b';', b'-') t = time.time() seconds = time.time() i = 0 while int(time.time() - seconds) < self.waiting_time: data = b'' p = process_time() for j in range( 1, int(number_of_pathes / 2) + number_of_pathes % 2 + 1): data = data + str(j).encode() + b' Euro ' seconds = seconds + process_time() - p match_context = MatchContext(data) match_element = sequence_model_element.get_match_element( 'match', match_context) log_atom = LogAtom(match_element.match_object, ParserMatch(match_element), t, match_value_stream_writer) match_value_stream_writer.receive_atom(log_atom) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( match_value_stream_writer.__class__.__name__, avg, results, self.different_pathes % number_of_pathes)
def test5check_value_reduction(self): """This unittest checks the functionality of reducing the values when the maxNumVals threshold is reached.""" event_type_detector = EventTypeDetector( self.aminer_config, [self.stream_printer_event_handler]) t = time.time() val_list = [[[]]] for i in range(1, event_type_detector.max_num_vals + 1, 1): log_atom = LogAtom( str(i).encode(), ParserMatch(MatchElement('path', str(i), i, None)), t, self.__class__.__name__) val_list[0][0].append(float(i)) self.assertTrue(event_type_detector.receive_atom(log_atom)) self.assertEqual(event_type_detector.values, val_list) i += 1 log_atom = LogAtom( str(i).encode(), ParserMatch(MatchElement('path', str(i), i, None)), t, self.__class__.__name__) val_list[0][0].append(float(i)) self.assertTrue(event_type_detector.receive_atom(log_atom)) self.assertEqual( event_type_detector.values, [[val_list[0][0][-event_type_detector.min_num_vals:]]])
def run_atom_filters_match_value_filter(self, number_of_pathes): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: new_match_path_detector = NewMatchPathDetector( self.aminer_config, [self.stream_printer_event_handler], 'Default', True) subhandler_filter = SubhandlerFilter([], stop_when_handled_flag=True) i = 0 dictionary = {} while i < 1000000: dictionary[i] = new_match_path_detector i = i + 1 i = 0 while i < number_of_pathes: match_value_filter = MatchValueFilter( self.integerd + str(i % number_of_pathes), dictionary, None) subhandler_filter.add_handler(match_value_filter, stop_when_handled_flag=True) i = i + 1 t = round(time.time(), 3) seconds = time.time() i = 0 while int(time.time() - seconds) < self.waiting_time: decimal_integer_value_me = DecimalIntegerValueModelElement( 'd' + str(i % number_of_pathes), DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) match_context = MatchContext(str(i).encode()) match_element = decimal_integer_value_me.get_match_element( 'integer', match_context) log_atom = LogAtom(match_element.match_string, ParserMatch(match_element), t, match_value_filter) subhandler_filter.receive_atom(log_atom) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( subhandler_filter.__class__.__name__, avg, results, '%d different %ss with a dictionary of %ss.' % (number_of_pathes, match_value_filter.__class__.__name__, new_match_path_detector.__class__.__name__))
def test3log_atom_matches_multiple_paths(self): """This unittest tests the receive_atom method with multiple paths matching.""" parser_count = ParserCount( self.aminer_config, ['fixed/seq', 'fixed/seq/m1', 'fixed/seq/m2', 'fixed/m3'], [self.stream_printer_event_handler]) t = time.time() log_atom = LogAtom(self.match_context_seq.match_data, ParserMatch(self.match_element_seq), t, parser_count) old_count_dict = dict(parser_count.count_dict) old_count_dict['fixed/seq'] = 1 old_count_dict['fixed/seq/m1'] = 1 old_count_dict['fixed/seq/m2'] = 1 parser_count.receive_atom(log_atom) self.assertEqual(parser_count.count_dict, old_count_dict)
def run_whitelist_violation_detector(self, number_of_pathes, modulo_factor): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: i = 0 rules = [] while i < number_of_pathes: rules.append( PathExistsMatchRule( self.integerd + str(i % number_of_pathes), None)) i = i + 1 whitelist_violation_detector = WhitelistViolationDetector( self.aminer_config, rules, [self.stream_printer_event_handler]) t = time.time() seconds = time.time() i = 0 while int(time.time() - seconds) < self.waiting_time: p = process_time() r = random.randint(1, 100) if r >= modulo_factor: r = 2 else: r = 1 seconds = seconds + process_time() - p decimal_integer_value_me = DecimalIntegerValueModelElement( 'd' + str(i % (number_of_pathes * r)), DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) match_context = MatchContext(str(i % 100).encode()) match_element = decimal_integer_value_me.get_match_element( 'integer', match_context) log_atom = LogAtom(match_element.match_string, ParserMatch(match_element), t, whitelist_violation_detector) whitelist_violation_detector.receive_atom(log_atom) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( whitelist_violation_detector.__class__.__name__, avg, results, '%d different PathExistsMatchRules and a moduloFactor of %d.' % (number_of_pathes, modulo_factor))
def run_missing_match_path_value_detector(self, number_of_pathes): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: i = 0 path_list = [] while i < number_of_pathes: path_list.append(self.integerd + str(i % number_of_pathes)) i = i + 1 missing_match_path_list_value_detector = MissingMatchPathListValueDetector( self.aminer_config, path_list, [self.stream_printer_event_handler], 'Default', True, 3600, 86400) seconds = time.time() t = seconds i = 0 while int(time.time() - seconds) < self.waiting_time: decimal_integer_value_me = DecimalIntegerValueModelElement( 'd' + str(i % number_of_pathes), DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) match_context = MatchContext(str(1).encode()) match_element = decimal_integer_value_me.get_match_element( 'integer', match_context) p = process_time() r = random.randint(0, 100) seconds = seconds + process_time() - p delta = int(r / 100) t = t + 4000 * delta log_atom = LogAtom(match_element.match_object, ParserMatch(match_element), t, missing_match_path_list_value_detector) missing_match_path_list_value_detector.receive_atom(log_atom) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( missing_match_path_list_value_detector.__class__.__name__, avg, results, self.different_pathes % number_of_pathes)
def test1receive_atoms_with_default_values(self): """ In this test case multiple log_atoms are received with default values of the EventTypeDetector. path_list is empty and all paths are learned dynamically in variable_key_list. """ event_type_detector = EventTypeDetector( self.aminer_config, [self.stream_printer_event_handler]) log_atoms = [] for line in self.log_lines: t = time.time() log_atoms.append( LogAtom( line, ParserMatch( self.parsing_model.get_match_element( 'parser', MatchContext(line))), t, self.__class__.__name__)) for i, log_atom in enumerate(log_atoms): self.assertTrue(event_type_detector.receive_atom(log_atom)) self.assertEqual(event_type_detector.total_records, i + 1)
def test2receive_non_serialized_data(self): """This unittest tests the receive_event method with not serialized data""" log_atom = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element), self.t, self) self.analysis_context.register_component(self, self.description) event_data = { 'AnalysisComponent': { 'AffectedParserPaths': ['test/path/1', 'test/path/2'] } } kafka_event_handler = KafkaEventHandler( self.analysis_context, self.kafka_topic, { 'bootstrap_servers': ['localhost:9092'], 'api_version': (2, 0, 1) }) self.assertFalse( kafka_event_handler.receive_event(self.test_detector, self.event_message, self.sorted_log_lines, event_data, log_atom, self)) self.assertRaises(StopIteration, self.consumer.__next__)
def test1receive_expected_event(self): """In this test case a normal Event happens and the json output should be sent to a StreamPrinterEventHandler.""" json_converter_handler = JsonConverterHandler( [self.stream_printer_event_handler], self.analysis_context) log_atom = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element), self.t, self) self.analysis_context.register_component(self, self.description) event_data = { 'AnalysisComponent': { 'AffectedParserPaths': ['test/path/1', 'test/path/2'] } } json_converter_handler.receive_event(self.test_detector, self.event_message, self.sorted_log_lines, event_data, log_atom, self) self.assertEqual( self.output_stream.getvalue(), self.expected_string % (datetime.fromtimestamp(self.t).strftime("%Y-%m-%d %H:%M:%S"), self.event_message, self.__class__.__name__, self.description, self.__class__.__name__, self.description, self.event_message, self.persistence_id, round(self.t, 2), ""))
def test6persist_and_load_data(self): """This unittest checks the functionality of the persistence by persisting and reloading values.""" event_type_detector = EventTypeDetector( self.aminer_config, [self.stream_printer_event_handler]) t = time.time() log_atom = LogAtom( b'22.2', ParserMatch(MatchElement('path', '22.2', 22.2, None)), t, self.__class__.__name__) event_type_detector.receive_atom(log_atom) event_type_detector.do_persist() event_type_detector_loaded = EventTypeDetector( self.aminer_config, [self.stream_printer_event_handler]) self.assertEqual(event_type_detector.variable_key_list, event_type_detector_loaded.variable_key_list) self.assertEqual(event_type_detector.values, event_type_detector_loaded.values) self.assertEqual(event_type_detector.longest_path, event_type_detector_loaded.longest_path) self.assertEqual(event_type_detector.check_variables, event_type_detector_loaded.check_variables) self.assertEqual(event_type_detector.num_eventlines, event_type_detector_loaded.num_eventlines)
def run_timestamps_unsorted_detector(self, reset_factor): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: timestamps_unsorted_detector = TimestampsUnsortedDetector( self.aminer_config, [self.stream_printer_event_handler]) seconds = time.time() s = seconds i = 0 mini = 100 while int(time.time() - seconds) < self.waiting_time: decimal_integer_value_me = DecimalIntegerValueModelElement( 'd', DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) p = process_time() r = random.randint(1, 100) seconds = seconds + process_time() - p match_context = MatchContext(str(i).encode()) match_element = decimal_integer_value_me.get_match_element( 'integer', match_context) log_atom = LogAtom(match_element.match_string, ParserMatch(match_element), s + min(r, mini), timestamps_unsorted_detector) timestamps_unsorted_detector.receive_atom(log_atom) if mini > r: mini = r else: mini = mini + reset_factor i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( timestamps_unsorted_detector.__class__.__name__, avg, results, 'a resetFactor of %f.' % reset_factor)
def test3receive_match_in_time_without_auto_include_flag(self): """This test case checks if log_atoms are accepted as expected with the auto_include_flag=False.""" description = 'test3newMatchIdValueComboDetectorTest' output_stream_empty_results = [True, False, True, False, True, False, True, True, False, True, False, True, True, True, False, False, False, True, False, True, False] id_dict_current_results = [ {1: {'parser/type/syscall/syscall': 1}}, {}, {2: {'parser/type/syscall/syscall': 2}}, {}, {3: {'parser/type/syscall/syscall': 3}}, {}, {100: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}, 4: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}, 5: {'parser/type/path/name': 'two'}}, {100: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}}, {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'}}, {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'}, 8: {'parser/type/syscall/syscall': 6}}, {100: {'parser/type/syscall/syscall': 1}, 7: {'parser/type/path/name': 'five'}, 8: {'parser/type/syscall/syscall': 6}}, {100: {'parser/type/syscall/syscall': 1}, 8: {'parser/type/syscall/syscall': 6}}, {100: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}, 9: {'parser/type/syscall/syscall': 2}}, {100: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}, 10: {'parser/type/path/name': 'one'}}, {100: {'parser/type/syscall/syscall': 1}}] id_dict_old_results = [{}] * 21 min_allowed_time_diff = 0.1 log_atoms = [] for line in self.log_lines: t = time.time() log_atoms.append( LogAtom(line, ParserMatch(self.parsing_model.get_match_element('parser', MatchContext(line))), t, self.__class__.__name__)) new_match_id_value_combo_detector = NewMatchIdValueComboDetector(self.aminer_config, [ 'parser/type/path/name', 'parser/type/syscall/syscall'], [self.stream_printer_event_handler], id_path_list=['parser/type/path/id', 'parser/type/syscall/id'], min_allowed_time_diff=min_allowed_time_diff, auto_include_flag=False, allow_missing_values_flag=True, persistence_id='audit_type_path', output_log_line=False) self.analysis_context.register_component(new_match_id_value_combo_detector, description) for i, log_atom in enumerate(log_atoms): self.assertTrue(new_match_id_value_combo_detector.receive_atom(log_atom)) self.assertEqual(self.output_stream.getvalue() == "", output_stream_empty_results[i], log_atom.raw_data) self.assertEqual(new_match_id_value_combo_detector.id_dict_current, id_dict_current_results[i]) self.assertEqual(new_match_id_value_combo_detector.id_dict_old, id_dict_old_results[i]) self.assertEqual(new_match_id_value_combo_detector.known_values, []) self.reset_output_stream()
def run_histogram_analysis(self, number_of_pathes, amplifier): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: modulo_time_bin_definition = ModuloTimeBinDefinition( 86400, 86400 / number_of_pathes, 0, 1, number_of_pathes, False) histogram_data = HistogramData('match/crontab', modulo_time_bin_definition) histogram_analysis = HistogramAnalysis( self.aminer_config, [(histogram_data.property_path, modulo_time_bin_definition)], amplifier * self.waiting_time, [self.stream_printer_event_handler], False, 'Default') i = 0 seconds = time.time() t = seconds while int(time.time() - seconds) < self.waiting_time: p = process_time() rand = random.randint(0, 100000) seconds = seconds + process_time() - p match_element = MatchElement('match/crontab', t + rand, t + rand, []) log_atom = LogAtom(histogram_data.bin_data, ParserMatch(match_element), t + i, histogram_analysis) histogram_analysis.receive_atom(log_atom) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( histogram_analysis.__class__.__name__, avg, results, '%d bin(s) and output after %d elements.' % (number_of_pathes, amplifier * self.waiting_time))
def run_timestamp_correction_filters(self, number_of_pathes): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: new_match_path_detector = NewMatchPathDetector( self.aminer_config, [self.stream_printer_event_handler], 'Default', True) simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust( [new_match_path_detector]) seconds = time.time() i = 0 while int(time.time() - seconds) < self.waiting_time: decimal_integer_value_me = DecimalIntegerValueModelElement( 'd' + str(i % number_of_pathes), DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) p = process_time() r = random.randint(1, 1000000) seconds = seconds + process_time() - p match_context = MatchContext(str(i).encode()) match_element = decimal_integer_value_me.get_match_element( 'integer', match_context) log_atom = LogAtom(match_element.match_string, ParserMatch(match_element), seconds - r, simple_monotonic_timestamp_adjust) simple_monotonic_timestamp_adjust.receive_atom(log_atom) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( simple_monotonic_timestamp_adjust.__class__.__name__, avg, results, 'a %s and %d different path(es).' % (new_match_path_detector.__class__.__name__, number_of_pathes))
def run_time_correlation_violation_detector(self, chance): results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: correlation_rule = CorrelationRule('Correlation', 0, chance, max_artefacts_a_for_single_b=1, artefact_match_parameters=[ ('/integer/d0', '/integer/d1') ]) a_class_selector = EventClassSelector('Selector1', [correlation_rule], None) b_class_selector = EventClassSelector('Selector2', None, [correlation_rule]) rules = [ Rules.PathExistsMatchRule('/integer/d0', a_class_selector), Rules.PathExistsMatchRule('/integer/d1', b_class_selector) ] time_correlation_violation_detector = TimeCorrelationViolationDetector( self.analysis_context.aminer_config, rules, [self.stream_printer_event_handler]) seconds = time.time() s = seconds i = 0 decimal_integer_value_me = DecimalIntegerValueModelElement( 'd0', DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) while int(time.time() - seconds) < self.waiting_time: integer = '/integer' p = process_time() r = random.randint(1, 100) seconds = seconds + process_time() - p decimal_integer_value_me1 = DecimalIntegerValueModelElement( 'd1', DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) match_context = MatchContext(str(i).encode()) match_element = decimal_integer_value_me.get_match_element( integer, match_context) log_atom = LogAtom(match_element.match_string, ParserMatch(match_element), s, time_correlation_violation_detector) time_correlation_violation_detector.receive_atom(log_atom) match_context = MatchContext(str(i).encode()) match_element = decimal_integer_value_me1.get_match_element( integer, match_context) log_atom = LogAtom(match_element.match_string, ParserMatch(match_element), s + r / 100, time_correlation_violation_detector) time_correlation_violation_detector.receive_atom(log_atom) s = s + r / 100 if r / 100 >= chance: p = process_time() match_context = MatchContext(str(i).encode()) match_element = decimal_integer_value_me.get_match_element( integer, match_context) log_atom = LogAtom(match_element.match_string, ParserMatch(match_element), s, time_correlation_violation_detector) time_correlation_violation_detector.receive_atom(log_atom) seconds = seconds + process_time() - p time_correlation_violation_detector.do_timer(s) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( time_correlation_violation_detector.__class__.__name__, avg, results, '%d%% chance of not finding an element' % ((1 - chance) * 100))
def test6receive_atom(self): """ This unittest tests if atoms are sorted to the right distribution and if the update steps also work properly. Therefore the assumption that after 200 values the VTD with the default parameters can change to the right distribution. """ # load data with open('unit/data/vtd_data/uni_data_test6', 'rb') as f: uni_data_list = pickle.load(f) # skipcq: BAN-B301 with open('unit/data/vtd_data/nor_data_test6', 'rb') as f: nor_data_list = pickle.load(f) # skipcq: BAN-B301 with open('unit/data/vtd_data/beta1_data_test6', 'rb') as f: beta1_data_list = pickle.load(f) # skipcq: BAN-B301 uni_data_list = uni_data_list * 10 nor_data_list = nor_data_list * 10 beta1_data_list = beta1_data_list * 10 vtd_arguments = [(100, 50), (110, 55), (90, 45), (80, 40), (70, 35)] for init, update in vtd_arguments: etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=init, num_update=update, div_thres=0.8, sim_thres=0.3, num_pause_others=0) t = time.time() stat_data = b'True' log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) # initialize data for i in range(init): self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['stat', [stat_data.decode()], True], result, (init, update, result)) # static -> static for i in range(update): self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['stat', [stat_data.decode()], True], result, (init, update, result)) # static -> uni for uni_data in uni_data_list[:init]: log_atom = LogAtom( uni_data, ParserMatch(MatchElement('', uni_data, str(uni_data), None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] pos_distr = vtd.alternative_distribution_types[0][0] self.assertTrue( result[0] == 'uni' or 'uni' in [distr[0] for distr in pos_distr], (init, update, result)) # uni -> others for i in range(update): stat_data = bytes(str((i % 75) * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['others', 0], result, (init, update, result)) # others -> d for i in range(update): stat_data = bytes(str((i % 10) * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual('d', result[0], (init, update, result)) # reset all etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=init, num_update=update, div_thres=0.3, sim_thres=0.5, num_pause_others=0, num_d_bt=30) # initialize with d for i in range(init): stat_data = bytes(str((i % 10) * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual('d', result[0], (init, update, result)) # discrete to others with new values for uni_data in uni_data_list[:init]: log_atom = LogAtom( uni_data, ParserMatch(MatchElement('', uni_data, str(uni_data), None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['others', 0], result, (init, update, result)) # reset all etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=init, num_update=update, div_thres=0.3, sim_thres=0.5, num_pause_others=0, num_d_bt=20) # initialize with d for i in range(init): stat_data = bytes(str((i % 10) * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual('d', result[0], (init, update, result)) # discrete to others without new values, low num_d_bt for i in range(update): stat_data = bytes(str((i % 3) * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['others', 0], result, (init, update, result)) # reset all etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=init, num_update=update, div_thres=0.3, sim_thres=0.5, num_pause_others=0, num_d_bt=100) # initialize with d for i in range(init): stat_data = bytes(str((i % 10) * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual('d', result[0], (init, update, result)) # discrete to others without new values, high num_d_bt for i in range(update): stat_data = bytes(str((i % 3) * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertNotEqual(['others', 0], result, (init, update, result)) # reset all etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=init, num_update=update, div_thres=0.3, sim_thres=0.3, num_pause_others=0) t = time.time() stat_data = b'True' log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) # initialize data for i in range(init): self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['stat', [stat_data.decode()], True], result, (init, update, result)) # static -> asc for i in range(init): stat_data = bytes(str(i * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['asc', 'float'], result, (init, update, result)) # asc -> desc for i in range(init, 0, -1): stat_data = bytes(str(i * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['desc', 'float'], result, (init, update, result)) # reset all etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=init, num_update=update, div_thres=0.3, sim_thres=0.3, num_pause_others=0) t = time.time() stat_data = b'True' log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) # initialize data for i in range(init): self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['stat', [stat_data.decode()], True], result, (init, update, result)) # static -> nor for nor_data in nor_data_list[:init]: log_atom = LogAtom( nor_data, ParserMatch(MatchElement('', nor_data, str(nor_data), None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] pos_distr = vtd.alternative_distribution_types[0][0] self.assertTrue( result[0] == 'nor' or 'nor' in [distr[0] for distr in pos_distr], (init, update, result)) # nor -> beta1 for beta1_data in beta1_data_list[:init]: log_atom = LogAtom( beta1_data, ParserMatch( MatchElement('', beta1_data, str(beta1_data), None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] pos_distr = vtd.alternative_distribution_types[0][0] self.assertTrue( (result[0] == 'beta' and result[-1] == 1) or 'beta1' in [distr[0] + str(distr[-1]) for distr in pos_distr], (init, update, result)) # reset all etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=init, num_update=update, div_thres=0.3, sim_thres=0.3, num_pause_others=0) t = time.time() stat_data = b'True' log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) # initialize data for i in range(init): self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual(['stat', [stat_data.decode()], True], result, (init, update, result)) # static -> unq vtd.test_ks_int = False unq_data_list = [bytes(str(i), 'utf-8') for i in range(init)] random.shuffle(unq_data_list) for unq_data in unq_data_list: log_atom = LogAtom( unq_data, ParserMatch(MatchElement('', unq_data, unq_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) vtd.receive_atom(log_atom) result = vtd.var_type[0][0] self.assertEqual('unq', result[0], (init, update, result))
def test4detect_var_type(self): """This unittest tests possible scenarios of the detect_var_type method.""" num_init = 100 etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init) t = time.time() # test the 'static' path of detect_var_type stat_data = b'5.3.0-55-generic' log_atom = LogAtom( stat_data, ParserMatch(MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) # check what happens if less than numMinAppearance values are available for i in range(num_init): self.assertTrue(etd.receive_atom(log_atom)) result = vtd.detect_var_type(0, 0) self.assertEqual(['stat', [stat_data.decode()], False], result) # reset etd and vtd for clear results. etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init) # test ascending with float values for i in range(num_init): stat_data = bytes(str(i * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) result = vtd.detect_var_type(0, 0) self.assertEqual(['asc', 'float'], result) # reset etd and vtd for clear results. etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init) # test ascending with integer values for i in range(num_init): stat_data = bytes(str(i), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) result = vtd.detect_var_type(0, 0) self.assertEqual(['asc', 'int'], result) # reset etd and vtd for clear results. etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init) # test descending with float values for i in range(num_init, 0, -1): stat_data = bytes(str(i * 0.1), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) result = vtd.detect_var_type(0, 0) self.assertEqual(['desc', 'float'], result) # reset etd and vtd for clear results. etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init) # test descending with integer values for i in range(num_init, 0, -1): stat_data = bytes(str(i), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) result = vtd.detect_var_type(0, 0) self.assertEqual(['desc', 'int'], result) # reset etd and vtd for clear results. etd = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler]) vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init, div_thres=0.3, test_ks_int=True) # test 'num_init' and 'div_thres' # prevent results from becoming asc or desc stat_data = bytes(str(99), 'utf-8') log_atom = LogAtom( stat_data, ParserMatch(MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) etd.receive_atom(log_atom) values = [float(stat_data)] for i in range(99): stat_data = bytes(str(i), 'utf-8') values.append(float(stat_data)) log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) result = vtd.detect_var_type(0, 0) # this means that the uniformal distribution must be detected. self.assertNotEqual( result[0] == 'uni' or 'uni' in [distr[0] for distr in result[-1]], result) # test 'divThres' option for the continuous distribution vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init, div_thres=1.0, test_ks_int=True) result = vtd.detect_var_type(0, 0) self.assertEqual(['unq', values], result) # test 'testInt' option for the continuous distribution vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init, div_thres=0.3, test_ks_int=False) result = vtd.detect_var_type(0, 0) self.assertEqual(['unq', values], result) # test 'simThres' option to result in 'others' vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init, div_thres=0.5, test_ks_int=False, sim_thres=0.5) values = [] for i in range(100): stat_data = bytes(str((i % 50) * 0.1), 'utf-8') values.append(float(stat_data)) log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) result = vtd.detect_var_type(0, 0) # at least (1 - 'simThresh') * 'numMinAppearance' and maximal 'numMinAppearance' * 'divThres' - 1 unique values must exist. self.assertEqual(['others', 0], result) # test discrete result vtd = VariableTypeDetector(self.aminer_config, [self.stream_printer_event_handler], etd, num_init=num_init, div_thres=0.5, test_ks_int=False, sim_thres=0.3) values = [] for i in range(num_init): stat_data = bytes(str((i % 50) * 0.1), 'utf-8') values.append(float(stat_data)) log_atom = LogAtom( stat_data, ParserMatch( MatchElement('', stat_data.decode(), stat_data, None)), t, self.__class__.__name__) self.assertTrue(etd.receive_atom(log_atom)) result = vtd.detect_var_type(0, 0) values_set = list(set(values)) values_app = [0 for _ in range(len(values_set))] for value in values: values_app[values_set.index(value)] += 1 values_app = [x / len(values) for x in values_app] self.assertEqual(['d', values_set, values_app, len(values)], result)
def run_new_match_id_value_combo_detector(self, min_allowed_time_diff): log_lines = [ b'type=SYSCALL msg=audit(1580367384.000:1): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=PATH msg=audit(1580367385.000:1): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 ' b'rdev=00:00 nametype=NORMAL', b'type=SYSCALL msg=audit(1580367386.000:2): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=PATH msg=audit(1580367387.000:2): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 ' b'nametype=NORMAL', b'type=SYSCALL msg=audit(1580367388.000:3): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=PATH msg=audit(1580367389.000:3): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00' b' nametype=NORMAL', b'type=SYSCALL msg=audit(1580367388.500:100): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=SYSCALL msg=audit(1580367390.000:4): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=PATH msg=audit(1580367391.000:4): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 ' b'nametype=NORMAL', b'type=PATH msg=audit(1580367392.000:5): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 ' b'nametype=NORMAL', b'type=SYSCALL msg=audit(1580367393.000:5): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=SYSCALL msg=audit(1580367394.000:6): arch=c000003e syscall=4 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=PATH msg=audit(1580367395.000:7): item=0 name="five" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 ' b'nametype=NORMAL', b'type=SYSCALL msg=audit(1580367396.000:8): arch=c000003e syscall=6 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=PATH msg=audit(1580367397.000:6): item=0 name="four" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 ' b'nametype=NORMAL', b'type=SYSCALL msg=audit(1580367398.000:7): arch=c000003e syscall=5 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=PATH msg=audit(1580367399.000:8): item=0 name="six" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 ' b'nametype=NORMAL', b'type=SYSCALL msg=audit(1580367400.000:9): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f ' b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ' b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)', b'type=PATH msg=audit(1580367401.000:9): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 ' b'rdev=00:00 nametype=NORMAL', b'type=PATH msg=audit(1580367402.000:10): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 ' b'rdev=00:00 nametype=NORMAL', b'type=SYSCALL msg=audit(1580367403.000:10): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 ' b'a3=4f items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 ' b'tty=(none) ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)' ] parsing_model = FirstMatchModelElement('type', [ SequenceModelElement('path', [ FixedDataModelElement('type', b'type=PATH '), FixedDataModelElement('msg_audit', b'msg=audit('), DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'), FixedDataModelElement('item_string', b'): item='), DecimalIntegerValueModelElement('item'), FixedDataModelElement('name_string', b' name="'), DelimitedDataModelElement('name', b'"'), FixedDataModelElement('inode_string', b'" inode='), DecimalIntegerValueModelElement('inode'), FixedDataModelElement('dev_string', b' dev='), DelimitedDataModelElement('dev', b' '), FixedDataModelElement('mode_string', b' mode='), DecimalIntegerValueModelElement('mode'), FixedDataModelElement('ouid_string', b' ouid='), DecimalIntegerValueModelElement('ouid'), FixedDataModelElement('ogid_string', b' ogid='), DecimalIntegerValueModelElement('ogid'), FixedDataModelElement('rdev_string', b' rdev='), DelimitedDataModelElement('rdev', b' '), FixedDataModelElement('nametype_string', b' nametype='), FixedWordlistDataModelElement('nametype', [b'NORMAL', b'ERROR']) ]), SequenceModelElement('syscall', [ FixedDataModelElement('type', b'type=SYSCALL '), FixedDataModelElement('msg_audit', b'msg=audit('), DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'), FixedDataModelElement('arch_string', b'): arch='), DelimitedDataModelElement('arch', b' '), FixedDataModelElement('syscall_string', b' syscall='), DecimalIntegerValueModelElement('syscall'), FixedDataModelElement('success_string', b' success='), FixedWordlistDataModelElement('success', [b'yes', b'no']), FixedDataModelElement('exit_string', b' exit='), DecimalIntegerValueModelElement('exit'), AnyByteDataModelElement('remainding_data') ]) ]) results = [None] * self.iterations avg = 0 z = 0 while z < self.iterations: i = 0 new_match_id_value_combo_detector = NewMatchIdValueComboDetector( self.aminer_config, ['parser/type/path/name', 'parser/type/syscall/syscall'], [self.stream_printer_event_handler], id_path_list=['parser/type/path/id', 'parser/type/syscall/id'], min_allowed_time_diff=min_allowed_time_diff, auto_include_flag=False, allow_missing_values_flag=True, persistence_id='audit_type_path', output_log_line=False) t = time.time() seconds = time.time() i = 0 while int(time.time() - seconds) < self.waiting_time: p = process_time() r = random.randint(0, len(log_lines) - 1) seconds = seconds + process_time() - p # this code just creates some data to be able to compare with other analysis components. decimal_integer_value_me = DecimalIntegerValueModelElement( 'd', DecimalIntegerValueModelElement.SIGN_TYPE_NONE, DecimalIntegerValueModelElement.PAD_TYPE_NONE) match_context = MatchContext(str(i % 100).encode()) _match_element = decimal_integer_value_me.get_match_element( 'integer', match_context) ######################################################################################## line = log_lines[r] log_atom = LogAtom( line, ParserMatch( parsing_model.get_match_element( 'parser', MatchContext(line))), t, self.__class__.__name__) new_match_id_value_combo_detector.receive_atom(log_atom) i = i + 1 results[z] = i z = z + 1 avg = avg + i avg = avg / self.iterations type(self).result = self.result + self.result_string % ( new_match_id_value_combo_detector.__class__.__name__, avg, results, '%.2f seconds min_allowed_time_diff.' % min_allowed_time_diff)