def test3_no_unique_persistence_id(self):
        """Check if a warning is printed if the same persistence_id is used for the same component type."""
        old_stderr = sys.stderr
        new_stderr = io.StringIO()
        sys.stderr = new_stderr
        PersistenceUtil.SKIP_PERSISTENCE_ID_WARNING = False

        NewMatchPathDetector(self.aminer_config,
                             [self.stream_printer_event_handler], 'Default',
                             True)
        NewMatchPathValueComboDetector(self.aminer_config, ['first/f1/s1'],
                                       [self.stream_printer_event_handler],
                                       'Default', False, True)
        self.assertEqual('', new_stderr.getvalue())

        NewMatchPathDetector(self.aminer_config,
                             [self.stream_printer_event_handler], 'Default',
                             True)
        self.assertEqual(
            'Warning: Detectors of type NewMatchPathDetector use the persistence_id "Default" multiple times. Please assign a'
            ' unique persistence_id for every component.\n',
            new_stderr.getvalue())
        new_stderr.seek(0)
        new_stderr.truncate(0)

        NewMatchPathValueComboDetector(self.aminer_config, ['first/f1/s1'],
                                       [self.stream_printer_event_handler],
                                       'Default', False, True)
        self.assertEqual(
            'Warning: Detectors of type NewMatchPathValueComboDetector use the persistence_id "Default" multiple times. Please'
            ' assign a unique persistence_id for every component.\n',
            new_stderr.getvalue())
        sys.stderr = old_stderr
    def test2persist_multiple_objects_of_multiple_class(self):
        """In this test case multiple instances of multiple classes are to be persisted and loaded."""
        description = "Test2PersistencyUtil"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'Default2', True)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)

        t = time.time()
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)
        log_atom_decimal_integer_value_me = LogAtom(
            self.match_context_decimal_integer_value_me.match_data,
            ParserMatch(self.match_element_decimal_integer_value_me), t,
            new_match_path_detector)
        new_match_path_detector.receive_atom(log_atom_fixed_dme)
        new_match_path_detector.receive_atom(log_atom_decimal_integer_value_me)

        other_new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'otherDetector2', True)
        self.analysis_context.register_component(other_new_match_path_detector,
                                                 description + "2")
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, other_new_match_path_detector)
        other_new_match_path_detector.receive_atom(log_atom_fixed_dme)

        new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
            self.aminer_config, ['first/f1/s1'],
            [self.stream_printer_event_handler], 'Default', False, True)
        self.analysis_context.register_component(
            new_match_path_value_combo_detector, description + "3")
        log_atom_sequence_me = LogAtom(
            self.fixed_dme.fixed_data,
            ParserMatch(self.match_element_first_match_me), t,
            new_match_path_value_combo_detector)
        new_match_path_value_combo_detector.receive_atom(log_atom_sequence_me)

        PersistencyUtil.persist_all()
        self.assertTrue(
            PersistencyUtil.load_json(
                new_match_path_detector.persistence_file_name) == [
                    self.match_element_fixed_dme.get_path(),
                    self.match_element_decimal_integer_value_me.get_path()
                ] or PersistencyUtil.load_json(
                    new_match_path_detector.persistence_file_name) == [
                        self.match_element_decimal_integer_value_me.get_path(),
                        self.match_element_fixed_dme.get_path()
                    ])
        self.assertEqual(
            PersistencyUtil.load_json(
                other_new_match_path_detector.persistence_file_name),
            [self.match_element_fixed_dme.get_path()])
        self.assertEqual(
            PersistencyUtil.load_json(
                new_match_path_value_combo_detector.persistence_file_name),
            ([[log_atom_sequence_me.raw_data]]))
Esempio n. 3
0
    def test3unsorted_log_atom(self):
        """In this test case multiple, UNSORTED LogAtoms of different sources are received by the class."""
        description = "Test3SimpleMultisourceAtomSync"
        sync_wait_time = 3

        any_byte_data_model_element = AnyByteDataModelElement('a1')
        new_match_path_detector1 = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'Default',
            False,
            output_log_line=False)
        self.analysis_context.register_component(new_match_path_detector1,
                                                 description)
        new_match_path_detector2 = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'Default',
            False,
            output_log_line=False)
        self.analysis_context.register_component(new_match_path_detector2,
                                                 description + "2")

        simple_multisource_atom_sync = SimpleMultisourceAtomSync(
            [new_match_path_detector1, new_match_path_detector2],
            sync_wait_time)
        t = time()
        match_context = MatchContext(self.calculation)
        match_element = any_byte_data_model_element.get_match_element(
            'match', match_context)
        log_atom1 = LogAtom(match_element.match_object,
                            ParserMatch(match_element), t,
                            new_match_path_detector1)
        log_atom2 = LogAtom(match_element.match_object,
                            ParserMatch(match_element), t - 1,
                            new_match_path_detector1)

        self.assertTrue(
            not simple_multisource_atom_sync.receive_atom(log_atom1))
        sleep(sync_wait_time)

        # unsorted, should be accepted
        self.reset_output_stream()
        self.assertTrue(simple_multisource_atom_sync.receive_atom(log_atom2))
        self.assertTrue(simple_multisource_atom_sync.receive_atom(log_atom1))
        self.assertEqual(
            self.output_stream.getvalue(), self.__expected_string %
            (datetime.fromtimestamp(t - 1).strftime(
                self.datetime_format_string),
             new_match_path_detector1.__class__.__name__, description, 1,
             self.match_path, self.calculation) + self.__expected_string %
            (datetime.fromtimestamp(t - 1).strftime(
                self.datetime_format_string),
             new_match_path_detector1.__class__.__name__, description + "2", 1,
             self.match_path, self.calculation) + self.__expected_string %
            (datetime.fromtimestamp(t).strftime(self.datetime_format_string),
             new_match_path_detector1.__class__.__name__, description, 1,
             self.match_path, self.calculation) + self.__expected_string %
            (datetime.fromtimestamp(t).strftime(self.datetime_format_string),
             new_match_path_detector1.__class__.__name__, description + "2", 1,
             self.match_path, self.calculation))
    def test1simple_monotonic_timestamp_adjust_test(self):
        """this test case checks if the timestamp is adjusted and logAtoms are forwarded correctly."""
        description = "Test1TimestampCorrectionFilter"
        match_context_fixed_dme = MatchContext(b' pid=')
        fixed_dme = FixedDataModelElement('s1', b' pid=')
        match_element_fixed_dme = fixed_dme.get_match_element(
            "match", match_context_fixed_dme)
        t = time()

        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'Default',
            False,
            output_log_line=False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)
        simple_monotonic_timstamp_adjust = SimpleMonotonicTimestampAdjust(
            [new_match_path_detector], False)
        log_atom_fixed_dme = LogAtom(fixed_dme.fixed_data,
                                     ParserMatch(match_element_fixed_dme), t,
                                     new_match_path_detector)
        self.assertEqual(
            simple_monotonic_timstamp_adjust.receive_atom(log_atom_fixed_dme),
            True)
        self.assertEqual(
            self.output_stream.getvalue(), self.__expected_string %
            (datetime.fromtimestamp(t).strftime("%Y-%m-%d %H:%M:%S"),
             new_match_path_detector.__class__.__name__, description, 1,
             self.match_path))
Esempio n. 5
0
    def test2_log_atom_known(self):
        """This test case checks the functionality of the autoIncludeFlag. If the same MatchElement is processed a second time and the
        auto_include_flag was True, no event must be triggered."""
        description = "Test2NewMatchPathDetector"
        new_match_path_detector = NewMatchPathDetector(self.aminer_config, [self.stream_printer_event_handler], 'Default', True,
                                                       output_log_line=False)
        self.analysis_context.register_component(new_match_path_detector, description)
        t = round(time.time(), 3)

        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, new_match_path_detector)
        log_atom_decimal_integer_value_me = LogAtom(self.match_context_decimal_integer_value_me.match_data,
                                                    ParserMatch(self.match_element_decimal_integer_value_me), t, new_match_path_detector)

        self.assertTrue(new_match_path_detector.receive_atom(log_atom_fixed_dme))
        self.assertEqual(self.output_stream.getvalue(), self.__expected_string % (
            datetime.fromtimestamp(t).strftime(self.datetime_format_string), new_match_path_detector.__class__.__name__, description, 1,
            self.match_path_s1, self.pid))
        self.reset_output_stream()

        # repeating should NOT produce the same result
        self.assertTrue(new_match_path_detector.receive_atom(log_atom_fixed_dme))
        self.assertEqual(self.output_stream.getvalue(), '')
        self.reset_output_stream()

        # other MatchElement
        self.assertTrue(new_match_path_detector.receive_atom(log_atom_decimal_integer_value_me))
        self.assertEqual(self.output_stream.getvalue(), self.__expected_string % (
            datetime.fromtimestamp(t).strftime(self.datetime_format_string), new_match_path_detector.__class__.__name__, description, 1,
            self.match_path_d1, self.uid))
Esempio n. 6
0
    def test1_log_atom_not_known(self):
        """This test case checks the correct processing of unknown log lines, which in reality means that an anomaly has been found. The
        output is directed to an output stream and compared for accuracy. The auto_include_flag is False and the output must be repeatable
        on second run."""
        description = "Test1NewMatchPathDetector"
        new_match_path_detector = NewMatchPathDetector(self.aminer_config, [self.stream_printer_event_handler], 'Default', False,
                                                       output_log_line=False)
        self.analysis_context.register_component(new_match_path_detector, description)
        t = round(time.time(), 3)
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, new_match_path_detector)
        log_atom_decimal_integer_value_me = LogAtom(self.match_context_decimal_integer_value_me.match_data,
                                                    ParserMatch(self.match_element_decimal_integer_value_me), t, new_match_path_detector)

        self.assertTrue(new_match_path_detector.receive_atom(log_atom_fixed_dme))
        self.assertEqual(self.output_stream.getvalue(), self.__expected_string % (
            datetime.fromtimestamp(t).strftime(self.datetime_format_string), new_match_path_detector.__class__.__name__, description, 1,
            self.match_path_s1, self.pid))
        self.reset_output_stream()

        # repeating should produce the same result
        self.assertTrue(new_match_path_detector.receive_atom(log_atom_fixed_dme))
        self.assertEqual(self.output_stream.getvalue(), self.__expected_string % (
            datetime.fromtimestamp(t).strftime(self.datetime_format_string), new_match_path_detector.__class__.__name__, description, 1,
            self.match_path_s1, self.pid))
        self.reset_output_stream()

        # other MatchElement
        self.assertTrue(new_match_path_detector.receive_atom(log_atom_decimal_integer_value_me))
        self.assertEqual(self.output_stream.getvalue(), self.__expected_string % (
            datetime.fromtimestamp(t).strftime(self.datetime_format_string), new_match_path_detector.__class__.__name__, description, 1,
            self.match_path_d1, self.uid))
Esempio n. 7
0
    def test2no_timestamp_log_atom(self):
        """In this test case a LogAtom with no timestamp is received by the class."""
        description = "Test2SimpleMultisourceAtomSync"
        sync_wait_time = 3

        any_byte_data_model_element = AnyByteDataModelElement('a1')
        new_match_path_detector1 = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'Default',
            False,
            output_log_line=False)
        self.analysis_context.register_component(new_match_path_detector1,
                                                 description)
        simple_multisource_atom_sync = SimpleMultisourceAtomSync(
            [new_match_path_detector1], sync_wait_time)
        t = time()

        match_context = MatchContext(self.calculation)
        match_element = any_byte_data_model_element.get_match_element(
            'match', match_context)
        log_atom1 = LogAtom(match_element.match_object,
                            ParserMatch(match_element), None,
                            new_match_path_detector1)

        self.assertTrue(simple_multisource_atom_sync.receive_atom(log_atom1))
        self.assertEqual(
            self.output_stream.getvalue(), self.__expected_string %
            (datetime.fromtimestamp(t).strftime(self.datetime_format_string),
             new_match_path_detector1.__class__.__name__, description, 1,
             self.match_path, self.calculation))
    def test3timestamp_higher_than_last_timestamp(self):
        """This test case checks if nothing happens, when the timestamp is, as expected, higher than the last one."""
        description = "Test3TimestampsUnsortedDetector"
        match_context_fixed_dme = MatchContext(self.pid)
        fixed_dme = FixedDataModelElement('s1', self.pid)
        match_element_fixed_dme = fixed_dme.get_match_element(
            "match", match_context_fixed_dme)
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)

        t = time()
        log_atom = LogAtom(fixed_dme.fixed_data,
                           ParserMatch(match_element_fixed_dme), t,
                           new_match_path_detector)
        timestamp_unsorted_detector = TimestampsUnsortedDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            False,
            output_log_line=False)
        self.analysis_context.register_component(timestamp_unsorted_detector,
                                                 description + "2")
        self.assertTrue(timestamp_unsorted_detector.receive_atom(log_atom))
        self.assertEqual(self.output_stream.getvalue(), '')

        log_atom.set_timestamp(t)
        self.assertTrue(timestamp_unsorted_detector.receive_atom(log_atom))
        self.assertEqual(self.output_stream.getvalue(), '')

        log_atom.set_timestamp(t + 10000)
        self.assertTrue(timestamp_unsorted_detector.receive_atom(log_atom))
        self.assertEqual(self.output_stream.getvalue(), '')
Esempio n. 9
0
    def test10_allowlist_event_with_known_and_unknown_paths(self):
        """This test case checks in which cases an event is triggered and compares with expected results."""
        description = "Test10NewMatchPathDetector"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'Default',
            True,
            output_log_line=False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)
        t = round(time.time(), 3)
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)
        new_match_path_detector.receive_atom(log_atom_fixed_dme)
        self.assertEqual(
            new_match_path_detector.allowlist_event(
                self.analysis % new_match_path_detector.__class__.__name__,
                self.match_element_fixed_dme.get_path(),
                None), 'Allowlisted path(es) %s in %s.' %
            (self.match_element_fixed_dme.get_path(),
             self.analysis % new_match_path_detector.__class__.__name__))

        new_match_path_detector.auto_include_flag = False
        self.assertEqual(
            new_match_path_detector.allowlist_event(
                self.analysis % new_match_path_detector.__class__.__name__,
                self.match_element_decimal_integer_value_me.get_path(),
                None), 'Allowlisted path(es) %s in %s.' %
            (self.match_element_decimal_integer_value_me.path,
             self.analysis % new_match_path_detector.__class__.__name__))
 def test1_no_list_or_no_atom_handler_list(self):
     """This test case verifies, that exceptions are raised when using wrong parameters."""
     self.assertRaises(
         Exception, SubhandlerFilter,
         NewMatchPathDetector(self.aminer_config, [], 'Default', True),
         False)
     self.assertRaises(Exception, SubhandlerFilter,
                       FixedDataModelElement('fixed', b'gesuchter String'),
                       False)
    def test1persist_multiple_objects_of_single_class(self):
        """In this test case multiple instances of one class are to be persisted and loaded."""
        description = "Test1PersistenceUtil"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            True)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)

        t = time.time()
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)
        log_atom_decimal_integer_value_me = LogAtom(
            self.match_context_decimal_integer_value_me.match_data,
            ParserMatch(self.match_element_decimal_integer_value_me), t,
            new_match_path_detector)
        new_match_path_detector.receive_atom(log_atom_fixed_dme)
        new_match_path_detector.receive_atom(log_atom_decimal_integer_value_me)

        other_new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            'otherDetector', True)
        self.analysis_context.register_component(other_new_match_path_detector,
                                                 description + "2")
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, other_new_match_path_detector)
        other_new_match_path_detector.receive_atom(log_atom_fixed_dme)

        PersistenceUtil.persist_all()
        persistence_data = PersistenceUtil.load_json(
            new_match_path_detector.persistence_file_name)
        self.assertTrue(persistence_data in ([
            self.match_element_fixed_dme.get_path(),
            self.match_element_decimal_integer_value_me.get_path()
        ], [
            self.match_element_decimal_integer_value_me.get_path(),
            self.match_element_fixed_dme.get_path()
        ]))
        self.assertEqual(
            PersistenceUtil.load_json(
                other_new_match_path_detector.persistence_file_name),
            [self.match_element_fixed_dme.get_path()])
Esempio n. 12
0
    def test2atom_filter_match_action(self):
        """This test case proves the functionality of the AtomFilters"""
        description = "Test2Rules"
        newMatchPathDetector = NewMatchPathDetector(self.aminer_config, [self.stream_printer_event_handler], 'Default', False)
        logAtomFixedDME = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), time(), newMatchPathDetector)
        subhandlerFilter = SubhandlerFilter([newMatchPathDetector])
        self.analysis_context.register_component(subhandlerFilter, description)
        self.analysis_context.register_component(newMatchPathDetector, description + "2")

        self.assertTrue(subhandlerFilter.receive_atom(logAtomFixedDME))
Esempio n. 13
0
 def test9WhitelistEventWhitelistingDataException(self):
     """The NewMatchPathDetector can not handle whitelisting data and therefore an exception is expected."""
     description = "Test9NewMatchPathDetector"
     new_match_path_detector = NewMatchPathDetector(self.aminer_config, [self.stream_printer_event_handler], 'Default', True,
                                                    output_log_line=False)
     self.analysis_context.register_component(new_match_path_detector, description)
     t = round(time.time(), 3)
     log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, new_match_path_detector)
     new_match_path_detector.receive_atom(log_atom_fixed_dme)
     self.assertRaises(Exception, new_match_path_detector.whitelist_event, self.analysis % new_match_path_detector.__class__.__name__,
                       log_atom_fixed_dme.raw_data, self.output_stream.getvalue(), ['random', 'Data'])
Esempio n. 14
0
 def test4_get_time_trigger_class(self):
     """
     The known paths are to be periodically stored after a certain time. This requires a synchronization class.
     The return of the correct class is to be checked in this test case.
     """
     new_match_path_detector = NewMatchPathDetector(
         self.aminer_config, [self.stream_printer_event_handler],
         'Default',
         True,
         output_log_line=False)
     self.assertEqual(new_match_path_detector.get_time_trigger_class(), 1)
Esempio n. 15
0
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    # Build the parsing model:
    from aminer.parsing.AnyByteDataModelElement import AnyByteDataModelElement

    parsing_model = AnyByteDataModelElement('AnyByteDataModelElement')

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    anomaly_event_handlers = [stream_printer_event_handler]

    # Now define the AtomizerFactory using the model. A simple line
    # based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [atom_filter], anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.input.SimpleUnparsedAtomHandler import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler,
                            stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config, ['/AnyByteDataModelElement'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)
Esempio n. 16
0
    def test3_parser_match_is_other_element(self):
        """In this test case the ParserMatch actually is no instance of ParserMatch. The atom should still be considered to be parsed."""
        description = "Test3SimpleUnparsedAtomHandler"
        any_byte_data_model_element = AnyByteDataModelElement('a1')
        new_match_path_detector1 = NewMatchPathDetector(self.aminer_config, [self.stream_printer_event_handler], 'Default', False)

        match_context = MatchContext(self.calculation)
        match_element = any_byte_data_model_element.get_match_element('match', match_context)
        log_atom = LogAtom(match_element.match_object, any_byte_data_model_element, time(), new_match_path_detector1)

        simple_unparsed_atom_handler = SimpleUnparsedAtomHandler([self.stream_printer_event_handler])
        self.analysis_context.register_component(simple_unparsed_atom_handler, description)
        self.assertTrue(not simple_unparsed_atom_handler.receive_atom(log_atom))
Esempio n. 17
0
    def test2_atom_is_parsed(self):
        """The atom in this test case has no ParserMatch."""
        description = "Test2SimpleUnparsedAtomHandler"
        any_byte_data_model_element = AnyByteDataModelElement('a1')
        new_match_path_detector1 = NewMatchPathDetector(self.aminer_config, [self.stream_printer_event_handler], 'Default', False)

        match_context = MatchContext(self.calculation)
        match_element = any_byte_data_model_element.get_match_element('match', match_context)
        log_atom = LogAtom(match_element.match_object, None, time(), new_match_path_detector1)

        simple_unparsed_atom_handler = SimpleUnparsedAtomHandler([self.stream_printer_event_handler])
        self.analysis_context.register_component(simple_unparsed_atom_handler, description)
        self.assertTrue(simple_unparsed_atom_handler.receive_atom(log_atom))
Esempio n. 18
0
    def test3_log_atom_known_from_persisted_data(self):
        """The persisting and reading of permitted log lines should be checked with this test."""
        description = "Test3NewMatchPathDetector"
        new_match_path_detector = NewMatchPathDetector(self.aminer_config, [self.stream_printer_event_handler], 'Default', True,
                                                       output_log_line=False)
        self.analysis_context.register_component(new_match_path_detector, description)
        t = round(time.time(), 3)
        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, new_match_path_detector)

        self.assertTrue(new_match_path_detector.receive_atom(log_atom_fixed_dme))
        self.assertEqual(self.output_stream.getvalue(), self.__expected_string % (
            datetime.fromtimestamp(t).strftime(self.datetime_format_string), new_match_path_detector.__class__.__name__, description, 1,
            self.match_path_s1, self.pid))
        new_match_path_detector.do_persist()
        self.reset_output_stream()

        otherNewMatchPathDetector = NewMatchPathDetector(self.aminer_config, [self.stream_printer_event_handler], 'Default', False,
                                                         output_log_line=False)
        otherLogAtomFixedDME = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, otherNewMatchPathDetector)

        self.assertTrue(otherNewMatchPathDetector.receive_atom(otherLogAtomFixedDME))
        self.assertEqual(self.output_stream.getvalue(), '')
Esempio n. 19
0
    def test1get_atomizer(self):
        any_byte_data_model_element = AnyByteDataModelElement('a1')
        new_match_path_detector1 = NewMatchPathDetector(
            self.aminer_config, [], 'Default', False)
        new_match_path_detector2 = NewMatchPathDetector(
            self.aminer_config, [], 'Default', False)

        simple_byte_stream_line_atomizer_factory = SimpleByteStreamLineAtomizerFactory(
            any_byte_data_model_element,
            [new_match_path_detector1, new_match_path_detector2],
            [self.stream_printer_event_handler], None)

        byte_stream_line_atomizer = simple_byte_stream_line_atomizer_factory.get_atomizer_for_resource(
            None)
        self.assertEqual(byte_stream_line_atomizer.atom_handler_list,
                         [new_match_path_detector1, new_match_path_detector2])
        self.assertEqual(byte_stream_line_atomizer.event_handler_list,
                         [self.stream_printer_event_handler])
        self.assertEqual(byte_stream_line_atomizer.default_timestamp_paths, [])
        self.assertEqual(byte_stream_line_atomizer.parsing_model,
                         any_byte_data_model_element)
        self.assertEqual(byte_stream_line_atomizer.max_line_length, 65536)
Esempio n. 20
0
 def test8_whitelist_event_type_exception(self):
     """This test case checks whether an exception is thrown when entering an event of another class."""
     description = "Test8NewMatchPathDetector"
     new_match_path_detector = NewMatchPathDetector(self.aminer_config, [self.stream_printer_event_handler], 'Default', True,
                                                    output_log_line=False)
     self.analysis_context.register_component(new_match_path_detector, description)
     t = round(time.time(), 3)
     log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data, ParserMatch(self.match_element_fixed_dme), t, new_match_path_detector)
     new_match_path_detector.receive_atom(log_atom_fixed_dme)
     new_match_path_value_combo_detector = NewMatchPathValueComboDetector(self.aminer_config, [], [self.stream_printer_event_handler],
                                                                          'Default', True, True)
     self.assertRaises(
         Exception, new_match_path_detector.whitelist_event, self.analysis % new_match_path_value_combo_detector.__class__.__name__,
         log_atom_fixed_dme.raw_data, self.output_stream.getvalue(), None)
    def test3receive_atom_handled_by_more_handlers(self):
        """In this test case more than one handler can handle the log atom. The impact of the stop_when_handled flag is tested."""
        description = "Test3AtomFilters"
        other_description = "Test3OtherAtomFilters"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)
        t = time.time()

        other_new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(other_new_match_path_detector,
                                                 other_description)

        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)

        subhandler_filter = SubhandlerFilter(
            [new_match_path_detector, other_new_match_path_detector], False)
        self.assertTrue(subhandler_filter.receive_atom(log_atom_fixed_dme))
        result = self.output_stream.getvalue()
        self.reset_output_stream()

        new_match_path_detector.receive_atom(log_atom_fixed_dme)
        resultFixedDME = self.output_stream.getvalue()
        self.reset_output_stream()

        other_new_match_path_detector.receive_atom(log_atom_fixed_dme)
        result_decimal_integer_value_me = self.output_stream.getvalue()

        self.assertEqual(result,
                         resultFixedDME + result_decimal_integer_value_me)
    def test2receive_atom_unhandled(self):
        """In this test case no handler can handle the log atom."""
        description = "Test2AtomFilters"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)
        t = time.time()

        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)

        subhandler_filter = SubhandlerFilter([], True)
        self.assertTrue(not subhandler_filter.receive_atom(log_atom_fixed_dme))
    def test9match_value_filter_receive_atom_target_value_not_found(self):
        """No target_value was found in the dictionary."""
        description = "Test9AtomFilters"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)
        t = time.time()

        log_atom_fixed_dme = LogAtom(b'24999',
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)
        match_value_filter = MatchValueFilter(
            self.match_element_fixed_dme.get_path(),
            {self.fixed_dme.fixed_data: None}, None)
        self.assertTrue(
            not match_value_filter.receive_atom(log_atom_fixed_dme))
    def test4match_path_filter_receive_atom_path_in_dictionary(self):
        """There is a path in the dictionary and the handler is not None. The default_parsed_atom_handler is None."""
        description = "Test4AtomFilters"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)
        t = time.time()

        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)

        match_path_filter = MatchPathFilter([
            (self.match_element_fixed_dme.get_path(), new_match_path_detector)
        ], None)
        self.assertTrue(match_path_filter.receive_atom(log_atom_fixed_dme))
    def test6match_path_filter_receive_atom_path_not_in_dictionary_default_set(
            self):
        """The searched path is not in the dictionary. The default_parsed_atom_handler is set."""
        description = "Test6AtomFilters"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)
        t = time.time()

        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)

        match_path_filter = MatchPathFilter(
            [(self.match_element_decimal_integer_value_me.get_path(),
              new_match_path_detector)], new_match_path_detector)
        self.assertTrue(match_path_filter.receive_atom(log_atom_fixed_dme))
    def test7match_value_filter_receive_atom_target_value_and_handler_found(
            self):
        """A target_value and a handler, which can handle the matchObject is found."""
        description = "Test7AtomFilters"
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)
        t = time.time()

        log_atom_fixed_dme = LogAtom(self.fixed_dme.fixed_data,
                                     ParserMatch(self.match_element_fixed_dme),
                                     t, new_match_path_detector)

        match_value_filter = MatchValueFilter(
            self.match_element_fixed_dme.get_path(),
            {self.fixed_dme.fixed_data: new_match_path_detector}, None)
        self.assertTrue(match_value_filter.receive_atom(log_atom_fixed_dme))
    def test2timestamp_lower_than_last_timestamp_exit_on_error(self):
        """This test case checks if the program exits, when the timestamp is lower than the last one and the exitOnError flag is set."""
        description = "Test2TimestampsUnsortedDetector"
        match_context_fixed_dme = MatchContext(self.pid)
        fixed_dme = FixedDataModelElement('s1', self.pid)
        match_element_fixed_dme = fixed_dme.get_match_element(
            "match", match_context_fixed_dme)
        new_match_path_detector = NewMatchPathDetector(
            self.aminer_config, [self.stream_printer_event_handler], 'Default',
            False)
        self.analysis_context.register_component(new_match_path_detector,
                                                 description)

        t = time()
        log_atom = LogAtom(fixed_dme.fixed_data,
                           ParserMatch(match_element_fixed_dme), t,
                           new_match_path_detector)
        timestamp_unsorted_detector = TimestampsUnsortedDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            True,
            output_log_line=False)
        self.analysis_context.register_component(timestamp_unsorted_detector,
                                                 description + "2")
        self.assertTrue(timestamp_unsorted_detector.receive_atom(log_atom))
        self.assertEqual(self.output_stream.getvalue(), '')

        log_atom.set_timestamp(t - 10000)
        with self.assertRaises(SystemExit) as cm:
            timestamp_unsorted_detector.receive_atom(log_atom)
        self.assertEqual(cm.exception.code, 1)
        self.assertEqual(
            self.output_stream.getvalue(), self.__expected_string %
            (datetime.fromtimestamp(t - 10000).strftime(
                self.datetime_format_string),
             datetime.fromtimestamp(t - 10000).strftime(
                 self.datetime_format_string),
             datetime.fromtimestamp(t).strftime(self.datetime_format_string),
             timestamp_unsorted_detector.__class__.__name__, description + "2",
             1, "b' pid='"))
Esempio n. 28
0
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement(
            'Space',
            b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'
        ),
        DelimitedDataModelElement('Data', b'%'),
        AnyByteDataModelElement('Rest')
    ]

    service_children_login_details = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status',
                                      [b' logged in', b' logged out']),
        OptionalMatchModelElement(
            'PastTime',
            SequenceModelElement('Time', [
                FixedDataModelElement('Blank', b' '),
                DecimalIntegerValueModelElement('Minutes'),
                FixedDataModelElement('Ago', b' minutes ago.')
            ]))
    ]

    service_children_cron_job = [
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', b' cron['),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')
    ]

    service_children_random_time = [
        FixedDataModelElement('Space', b'Random: '),
        DecimalIntegerValueModelElement('Random')
    ]

    service_children_sensors = [
        SequenceModelElement('CPUTemp', [
            FixedDataModelElement('FixedTemp', b'CPU Temp: '),
            DecimalIntegerValueModelElement('Temp'),
            FixedDataModelElement('Degrees', b'\xc2\xb0C')
        ]),
        FixedDataModelElement('Space1', b', '),
        SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('FixedWorkload', b'CPUWorkload: '),
            DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')
        ]),
        FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S')
    ]

    service_children_user_ip_address = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '),
        IpAddressDataModelElement('IP')
    ]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', b' cron['),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('StartTime', b'\' in 5 min.')
    ]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', b' cron['),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')
    ]

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement',
                             service_children_cron_job_announcement),
        SequenceModelElement('CronExecution',
                             service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job),
        SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details),
        DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time),
        SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filters = AtomFilters.SubhandlerFilter(None)
    analysis_context.register_component(atom_filters,
                                        component_name="AtomFilter")

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
        [atom_filters])
    analysis_context.register_component(
        simple_monotonic_timestamp_adjust,
        component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(
        analysis_context)  # skipcq: BAN-B108
    from aminer.events.Utils import VolatileLogarithmicBackoffEventHistory
    volatile_logarithmic_backoff_event_history = VolatileLogarithmicBackoffEventHistory(
        100)
    anomaly_event_handlers = [
        stream_printer_event_handler,
        volatile_logarithmic_backoff_event_history
    ]
    analysis_context.register_component(
        volatile_logarithmic_backoff_event_history,
        component_name="VolatileLogarithmicBackoffEventHistory")

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_monotonic_timestamp_adjust],
        anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filters.add_handler(simple_unparsed_atom_handler,
                             stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(
        analysis_context.aminer_config, anomaly_event_handlers)
    atom_filters.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(
        timestamps_unsorted_detector,
        component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule(
                    '/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(
                    Rules.ValueMatchRule('/model/LoginDetails/Username',
                                         b'root'))
            ]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(
                    Rules.PathExistsMatchRule(
                        '/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails')
            ]),
            Rules.NegationMatchRule(
                Rules.PathExistsMatchRule('/model/LoginDetails'))
        ])
    ]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, allowlist_rules,
        anomaly_event_handlers)
    analysis_context.register_component(allowlist_violation_detector,
                                        component_name="Allowlist")
    atom_filters.add_handler(allowlist_violation_detector)

    from aminer.analysis.ParserCount import ParserCount
    parser_count = ParserCount(analysis_context.aminer_config, None,
                               anomaly_event_handlers, 10)
    analysis_context.register_component(parser_count,
                                        component_name="ParserCount")
    atom_filters.add_handler(parser_count)

    from aminer.analysis.EventCorrelationDetector import EventCorrelationDetector
    ecd = EventCorrelationDetector(analysis_context.aminer_config,
                                   anomaly_event_handlers,
                                   check_rules_flag=True,
                                   hypothesis_max_delta_time=1.0,
                                   auto_include_flag=True)
    analysis_context.register_component(
        ecd, component_name="EventCorrelationDetector")
    atom_filters.add_handler(ecd)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filters.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10000th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(
            tuple(match_value_list))
        if extra_data is not None:
            mod = 10000
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/UName', '/model/DailyCron/JobNumber'],
        anomaly_event_handlers,
        auto_include_flag=False,
        tuple_transformation_function=tuple_transformation_function)
    analysis_context.register_component(
        enhanced_new_match_path_value_combo_detector,
        component_name="EnhancedNewValueCombo")
    atom_filters.add_handler(enhanced_new_match_path_value_combo_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(
        86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(
        analysis_context.aminer_config,
        [('/model/RandomTime/Random', modulo_time_bin_definition),
         ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers)
    analysis_context.register_component(histogram_analysis,
                                        component_name="HistogramAnalysis")
    atom_filters.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime',
        modulo_time_bin_definition, 10, anomaly_event_handlers)
    analysis_context.register_component(
        path_dependent_histogram_analysis,
        component_name="PathDependentHistogramAnalysis")
    atom_filters.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(
        analysis_context.aminer_config, anomaly_event_handlers, None,
        ['/model/Random'], 100, 10)
    analysis_context.register_component(
        match_value_average_change_detector,
        component_name="MatchValueAverageChange")
    atom_filters.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(sys.stdout, [
        '/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload',
        '/model/Sensors/DTM'
    ], b';', b'')
    analysis_context.register_component(
        match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filters.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers,
        auto_include_flag=False)
    analysis_context.register_component(
        new_match_path_value_combo_detector,
        component_name="NewMatchPathValueCombo")
    atom_filters.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchIdValueComboDetector import NewMatchIdValueComboDetector
    new_match_id_value_combo_detector = NewMatchIdValueComboDetector(
        analysis_context.aminer_config,
        ['/model/type/path/name', '/model/type/syscall/syscall'],
        anomaly_event_handlers,
        id_path_list=['/model/type/path/id', '/model/type/syscall/id'],
        min_allowed_time_diff=5,
        auto_include_flag=True,
        allow_missing_values_flag=True,
        output_log_line=True)
    analysis_context.register_component(
        new_match_id_value_combo_detector,
        component_name="NewMatchIdValueComboDetector")
    atom_filters.add_handler(new_match_id_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/Job Number', '/model/IPAddresses/Username'],
        anomaly_event_handlers,
        auto_include_flag=False)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filters.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'],
        anomaly_event_handlers,
        auto_include_flag=False,
        default_interval=2,
        realert_interval=5)
    analysis_context.register_component(missing_match_path_value_detector,
                                        component_name="MissingMatch")
    atom_filters.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        2,
        min_rule_attributes=1,
        max_rule_attributes=5,
        record_count_before_event=70000,
        output_log_line=True)
    analysis_context.register_component(
        time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filters.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule(
        'CronJobAnnouncement',
        5,
        6,
        max_artefacts_a_for_single_b=1,
        artefact_match_parameters=[('/model/CronAnnouncement/JobNumber',
                                    '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement',
                                          [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None,
                                          [cron_job_announcement])
    rules = [
        Rules.PathExistsMatchRule('/model/CronAnnouncement/Run',
                                  a_class_selector),
        Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)
    ]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(
        analysis_context.aminer_config, rules, anomaly_event_handlers)
    analysis_context.register_component(
        time_correlation_violation_detector,
        component_name="TimeCorrelationViolationDetector")
    atom_filters.add_handler(time_correlation_violation_detector)

    from aminer.events.DefaultMailNotificationEventHandler import DefaultMailNotificationEventHandler
    if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysis_context.aminer_config.config_properties:
        mail_notification_handler = DefaultMailNotificationEventHandler(
            analysis_context)
        analysis_context.register_component(mail_notification_handler,
                                            component_name="MailHandler")
        anomaly_event_handlers.append(mail_notification_handler)
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement('Space', b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'),
        DelimitedDataModelElement('Data', b'%'), AnyByteDataModelElement('Rest')]

    service_children_login_details = [
        FixedDataModelElement('User/LoginDetails', b'User '), DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status', [b' logged in', b' logged out']),
        OptionalMatchModelElement('PastTime', SequenceModelElement('Time', [
            FixedDataModelElement('Blank', b' '), DecimalIntegerValueModelElement('Minutes'),
            FixedDataModelElement('Ago', b' minutes ago.')]))]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('UNameSpace2', b' '), DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')]

    service_children_random_time = [FixedDataModelElement('Space', b'Random: '), DecimalIntegerValueModelElement('Random')]

    service_children_sensors = [SequenceModelElement('CPUTemp', [
        FixedDataModelElement('FixedTemp', b'CPU Temp: '), DecimalIntegerValueModelElement('Temp'),
        FixedDataModelElement('Degrees', b'\xc2\xb0C')]), FixedDataModelElement('Space1', b', '), SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('FixedWorkload', b'CPU Workload: '), DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')]), FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)]

    service_children_user_ip_address = [
        FixedDataModelElement('User/UserIPAddress', b'User '), DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '), IpAddressDataModelElement('IP')]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement('CronType', [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('StartTime', b'\' in 5 min.')]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement('CronType', [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')]

    service_children_audit = [SequenceModelElement('path', [
        FixedDataModelElement('type', b'type=PATH '), FixedDataModelElement('msg_audit', b'msg=audit('),
        DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
        FixedDataModelElement('item_string', b'): item='), DecimalIntegerValueModelElement('item'),
        FixedDataModelElement('name_string', b' name="'), DelimitedDataModelElement('name', b'"'),
        FixedDataModelElement('inode_string', b'" inode='), DecimalIntegerValueModelElement('inode'),
        FixedDataModelElement('dev_string', b' dev='), DelimitedDataModelElement('dev', b' '),
        FixedDataModelElement('mode_string', b' mode='),
        DecimalIntegerValueModelElement('mode', value_pad_type=DecimalIntegerValueModelElement.PAD_TYPE_ZERO),
        FixedDataModelElement('ouid_string', b' ouid='), DecimalIntegerValueModelElement('ouid'),
        FixedDataModelElement('ogid_string', b' ogid='), DecimalIntegerValueModelElement('ogid'),
        FixedDataModelElement('rdev_string', b' rdev='), DelimitedDataModelElement('rdev', b' '),
        FixedDataModelElement('nametype_string', b' nametype='), FixedWordlistDataModelElement('nametype', [b'NORMAL', b'ERROR'])]),
        SequenceModelElement('syscall', [
            FixedDataModelElement('type', b'type=SYSCALL '), FixedDataModelElement('msg_audit', b'msg=audit('),
            DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
            FixedDataModelElement('arch_string', b'): arch='), DelimitedDataModelElement('arch', b' '),
            FixedDataModelElement('syscall_string', b' syscall='), DecimalIntegerValueModelElement('syscall'),
            FixedDataModelElement('success_string', b' success='), FixedWordlistDataModelElement('success', [b'yes', b'no']),
            FixedDataModelElement('exit_string', b' exit='), DecimalIntegerValueModelElement('exit'),
            AnyByteDataModelElement('remainding_data')])]

    service_children_parsing_model_element = [
        DateTimeModelElement('DateTimeModelElement', b'Current DateTime: %d.%m.%Y %H:%M:%S'),
        DecimalFloatValueModelElement('DecimalFloatValueModelElement', value_sign_type='optional'),
        DecimalIntegerValueModelElement('DecimalIntegerValueModelElement', value_sign_type='optional', value_pad_type='blank'),
        SequenceModelElement('se', [
            DelimitedDataModelElement('DelimitedDataModelElement', b';'), FixedDataModelElement('FixedDataModelElement', b';')])]

    # ElementValueBranchModelElement
    fixed_data_me1 = FixedDataModelElement("fixed1", b'match ')
    fixed_data_me2 = FixedDataModelElement("fixed2", b'fixed String')
    fixed_wordlist_data_model_element = FixedWordlistDataModelElement("wordlist", [b'data: ', b'string: '])
    decimal_integer_value_model_element = DecimalIntegerValueModelElement("decimal")

    service_children_parsing_model_element.append(
        ElementValueBranchModelElement('ElementValueBranchModelElement', FirstMatchModelElement("first", [
            SequenceModelElement("seq1", [fixed_data_me1, fixed_wordlist_data_model_element]),
            SequenceModelElement("seq2", [fixed_data_me1, fixed_wordlist_data_model_element, fixed_data_me2])]), "wordlist",
                                 {0: decimal_integer_value_model_element, 1: fixed_data_me2}))
    service_children_parsing_model_element.append(HexStringModelElement('HexStringModelElement'))
    service_children_parsing_model_element.append(SequenceModelElement('se2', [
        FixedDataModelElement('FixedDataModelElement', b'Gateway IP-Address: '), IpAddressDataModelElement('IpAddressDataModelElement')]))
    import locale
    loc = locale.getlocale()
    if loc == (None, None):
        loc = ('en_US', 'utf8')
    service_children_parsing_model_element.append(
        MultiLocaleDateTimeModelElement('MultiLocaleDateTimeModelElement', [(b'%b %d %Y', None, '%s.%s' % loc)]))
    service_children_parsing_model_element.append(
        RepeatedElementDataModelElement('RepeatedElementDataModelElement', SequenceModelElement('SequenceModelElement', [
            FixedDataModelElement('FixedDataModelElement', b'[drawn number]: '),
            DecimalIntegerValueModelElement('DecimalIntegerValueModelElement')]), 1))
    service_children_parsing_model_element.append(VariableByteDataModelElement('VariableByteDataModelElement', b'-@#'))
    service_children_parsing_model_element.append(SequenceModelElement('se', [
        WhiteSpaceLimitedDataModelElement('WhiteSpaceLimitedDataModelElement'), FixedDataModelElement('fixed', b' ')]))

    # The Base64StringModelElement must be just before the AnyByteDataModelElement to avoid unexpected Matches.
    service_children_parsing_model_element.append(Base64StringModelElement('Base64StringModelElement'))

    # The OptionalMatchModelElement must be paired with a FirstMatchModelElement because it accepts all data and thus no data gets
    # to the AnyByteDataModelElement. The AnyByteDataModelElement must be last, because all bytes are accepted.
    service_children_parsing_model_element.append(
        OptionalMatchModelElement('/', FirstMatchModelElement('FirstMatchModelElement//optional', [
            FixedDataModelElement('FixedDataModelElement', b'The-searched-element-was-found!'), SequenceModelElement('se', [
                FixedDataModelElement('FixedDME', b'Any:'), AnyByteDataModelElement('AnyByteDataModelElement')])])))

    alphabet = b'ghijkl'
    service_children_ecd = []
    for _, char in enumerate(alphabet):
        char = bytes([char])
        service_children_ecd.append(FixedDataModelElement(char.decode(), char))

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement', service_children_cron_job_announcement),
        SequenceModelElement('CronExecution', service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job), SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details), DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time), SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address), FirstMatchModelElement('type', service_children_audit),
        FirstMatchModelElement('ECD', service_children_ecd), FirstMatchModelElement('ParsingME', service_children_parsing_model_element)])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust([atom_filter])
    analysis_context.register_component(simple_monotonic_timestamp_adjust, component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    from aminer.events.JsonConverterHandler import JsonConverterHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    json_converter_handler = JsonConverterHandler([stream_printer_event_handler], analysis_context)
    anomaly_event_handlers = [json_converter_handler]

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(parsing_model, [simple_monotonic_timestamp_adjust],
                                                                            anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler, VerboseUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler, stop_when_handled_flag=False)
    analysis_context.register_component(simple_unparsed_atom_handler, component_name="SimpleUnparsedHandler")

    verbose_unparsed_atom_handler = VerboseUnparsedAtomHandler(anomaly_event_handlers, parsing_model)
    atom_filter.add_handler(verbose_unparsed_atom_handler, stop_when_handled_flag=True)
    analysis_context.register_component(verbose_unparsed_atom_handler, component_name="VerboseUnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(timestamps_unsorted_detector, component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule('/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(Rules.ValueMatchRule('/model/LoginDetails/Username', b'root')),
                Rules.DebugMatchRule(debug_match_result=True)]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(Rules.PathExistsMatchRule('/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails'),
                Rules.DebugMatchRule(debug_match_result=True)]),
            Rules.NegationMatchRule(Rules.PathExistsMatchRule('/model/LoginDetails'))])]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(analysis_context.aminer_config, allowlist_rules, anomaly_event_handlers,
                                                              output_log_line=True)
    analysis_context.register_component(allowlist_violation_detector, component_name="Allowlist")
    atom_filter.add_handler(allowlist_violation_detector)

    from aminer.analysis.ParserCount import ParserCount
    parser_count = ParserCount(analysis_context.aminer_config, None, anomaly_event_handlers, 10)
    analysis_context.register_component(parser_count, component_name="ParserCount")
    atom_filter.add_handler(parser_count)

    from aminer.analysis.EventTypeDetector import EventTypeDetector
    etd = EventTypeDetector(analysis_context.aminer_config, anomaly_event_handlers)
    analysis_context.register_component(etd, component_name="EventTypeDetector")
    atom_filter.add_handler(etd)

    from aminer.analysis.VariableTypeDetector import VariableTypeDetector
    vtd = VariableTypeDetector(analysis_context.aminer_config, anomaly_event_handlers, etd, silence_output_except_indicator=False,
                               output_log_line=False, ignore_list=["/model/RandomTime"])
    analysis_context.register_component(vtd, component_name="VariableTypeDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis.VariableCorrelationDetector import VariableCorrelationDetector
    vtd = VariableCorrelationDetector(analysis_context.aminer_config, anomaly_event_handlers, etd, disc_div_thres=0.5,
                                      ignore_list=["/model/RandomTime"])
    analysis_context.register_component(vtd, component_name="VariableCorrelationDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis.EventCorrelationDetector import EventCorrelationDetector
    ecd = EventCorrelationDetector(analysis_context.aminer_config, anomaly_event_handlers, check_rules_flag=True,
                                   hypothesis_max_delta_time=1.0)
    analysis_context.register_component(ecd, component_name="EventCorrelationDetector")
    atom_filter.add_handler(ecd)

    from aminer.analysis.EventFrequencyDetector import EventFrequencyDetector
    efd = EventFrequencyDetector(analysis_context.aminer_config, anomaly_event_handlers, window_size=0.1)
    analysis_context.register_component(efd, component_name="EventFrequencyDetector")
    atom_filter.add_handler(efd)

    from aminer.analysis.EventSequenceDetector import EventSequenceDetector
    esd = EventSequenceDetector(analysis_context.aminer_config, anomaly_event_handlers, ['/model/ParsingME'], ignore_list=[
        '/model/ECD/g', '/model/ECD/h', '/model/ECD/i', '/model/ECD/j', '/model/ECD/k', '/model/ECD/l', '/model/Random',
        '/model/RandomTime', '/model/DailyCron'])
    analysis_context.register_component(esd, component_name="EventSequenceDetector")
    atom_filter.add_handler(esd)

    from aminer.analysis.MatchFilter import MatchFilter
    match_filter = MatchFilter(analysis_context.aminer_config, ['/model/Random'], anomaly_event_handlers, target_value_list=[
        1, 10, 100], output_log_line=True)
    analysis_context.register_component(match_filter, component_name="MatchFilter")
    atom_filter.add_handler(match_filter)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(analysis_context.aminer_config, anomaly_event_handlers, auto_include_flag=True,
                                                   output_log_line=True)
    analysis_context.register_component(new_match_path_detector, component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(tuple(match_value_list))
        if extra_data is not None:
            mod = 10
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(analysis_context.aminer_config, [
        '/model/DailyCron/UName', '/model/DailyCron/JobNumber'], anomaly_event_handlers, auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function, output_log_line=True)
    analysis_context.register_component(enhanced_new_match_path_value_combo_detector, component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    import re
    ip_match_action = Rules.EventGenerationMatchAction(
        "Analysis.Rules.IPv4InRFC1918MatchRule", "Private IP address occurred!", anomaly_event_handlers)

    vdmt = Rules.ValueDependentModuloTimeMatchRule(None, 3, ["/model/ECD/j", "/model/ECD/k", "/model/ECD/l"], {b"e": [0, 2.95]}, [0, 3])
    mt = Rules.ModuloTimeMatchRule(None, 3, 0, 3, None)
    time_allowlist_rules = [
        Rules.AndMatchRule([
            Rules.ParallelMatchRule([
                Rules.ValueDependentDelegatedMatchRule([
                    '/model/ECD/g', '/model/ECD/h', '/model/ECD/i', '/model/ECD/j', '/model/ECD/k', '/model/ECD/l'], {
                        (b"a",): mt, (b"b",): mt, (b"c",): mt, (b"d",): vdmt, (b"e",): vdmt, (b"f",): vdmt, None: mt}, mt),
                Rules.IPv4InRFC1918MatchRule("/model/ParsingME/se2/IpAddressDataModelElement", ip_match_action),
                Rules.DebugHistoryMatchRule(debug_match_result=True)
            ]),
            # IP addresses 8.8.8.8, 8.8.4.4 and 10.0.0.0 - 10.255.255.255 are not allowed
            Rules.NegationMatchRule(Rules.ValueListMatchRule("/model/ParsingME/se2/IpAddressDataModelElement", [134744072, 134743044])),
            Rules.NegationMatchRule(Rules.ValueRangeMatchRule("/model/ParsingME/se2/IpAddressDataModelElement", 167772160, 184549375)),
            Rules.NegationMatchRule(Rules.StringRegexMatchRule("/model/type/syscall/success", re.compile(b"^no$")))
        ])
    ]
    time_allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, time_allowlist_rules, anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(time_allowlist_violation_detector, component_name="TimeAllowlist")
    atom_filter.add_handler(time_allowlist_violation_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(analysis_context.aminer_config, [
        ('/model/RandomTime/Random', modulo_time_bin_definition), ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(histogram_analysis, component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime', modulo_time_bin_definition, 10, anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(path_dependent_histogram_analysis, component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(analysis_context.aminer_config, anomaly_event_handlers, None, [
        '/model/Random'], 100, 10, output_log_line=True)
    analysis_context.register_component(match_value_average_change_detector, component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(
        sys.stdout, ['/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload', '/model/Sensors/DTM'], b';', b'')
    analysis_context.register_component(match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config, ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(new_match_path_value_combo_detector, component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchIdValueComboDetector import NewMatchIdValueComboDetector
    new_match_id_value_combo_detector = NewMatchIdValueComboDetector(analysis_context.aminer_config, [
        '/model/type/path/name', '/model/type/syscall/syscall'], anomaly_event_handlers, id_path_list=[
        '/model/type/path/id', '/model/type/syscall/id'], min_allowed_time_diff=5, auto_include_flag=True, allow_missing_values_flag=True,
        output_log_line=True)
    analysis_context.register_component(new_match_id_value_combo_detector, component_name="NewMatchIdValueComboDetector")
    atom_filter.add_handler(new_match_id_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(analysis_context.aminer_config, [
        '/model/DailyCron/JobNumber', '/model/IPAddresses/Username'], anomaly_event_handlers, auto_include_flag=True, output_log_line=True)
    analysis_context.register_component(new_match_path_value_detector, component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'], anomaly_event_handlers, auto_include_flag=True, default_interval=2,
        realert_interval=5, output_log_line=True)
    analysis_context.register_component(missing_match_path_value_detector, component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config, anomaly_event_handlers, 2, min_rule_attributes=1, max_rule_attributes=5,
        record_count_before_event=10000, output_log_line=True)
    analysis_context.register_component(time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule('CronJobAnnouncement', 5, 6, max_artefacts_a_for_single_b=1, artefact_match_parameters=[
        ('/model/CronAnnouncement/JobNumber', '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement', [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None, [cron_job_announcement])
    rules = [Rules.PathExistsMatchRule('/model/CronAnnouncement/Run', a_class_selector),
             Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(analysis_context.aminer_config, rules, anomaly_event_handlers,
                                                                           output_log_line=True)
    analysis_context.register_component(time_correlation_violation_detector, component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement(
            'Space',
            b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'
        ),
        DelimitedDataModelElement('Data', b'%'),
        AnyByteDataModelElement('Rest')
    ]

    service_children_login_details = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status',
                                      [b' logged in', b' logged out']),
        OptionalMatchModelElement(
            'PastTime',
            SequenceModelElement('Time', [
                FixedDataModelElement('Blank', b' '),
                DecimalIntegerValueModelElement('Minutes'),
                FixedDataModelElement('Ago', b' minutes ago.')
            ]))
    ]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')
    ]

    service_children_random_time = [
        FixedDataModelElement('Space', b'Random: '),
        DecimalIntegerValueModelElement('Random')
    ]

    service_children_sensors = [
        SequenceModelElement('CPUTemp', [
            FixedDataModelElement('FixedTemp', b'CPU Temp: '),
            DecimalIntegerValueModelElement('Temp'),
            FixedDataModelElement('Degrees', b'\xc2\xb0C')
        ]),
        FixedDataModelElement('Space1', b', '),
        SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('Fixed Workload', b'CPU Workload: '),
            DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')
        ]),
        FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)
    ]

    service_children_user_ip_address = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '),
        IpAddressDataModelElement('IP')
    ]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Start Time', b'\' in 5 min.')
    ]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')
    ]

    service_children_parsing_model_element = [
        DateTimeModelElement('DateTimeModelElement',
                             b'Current DateTime: %d.%m.%Y %H:%M:%S'),
        DecimalFloatValueModelElement('DecimalFloatValueModelElement',
                                      value_sign_type='optional'),
        DecimalIntegerValueModelElement('DecimalIntegerValueModelElement',
                                        value_sign_type='optional',
                                        value_pad_type='blank'),
        SequenceModelElement('', [
            DelimitedDataModelElement('DelimitedDataModelElement', b';'),
            FixedDataModelElement('FixedDataModelElement', b';')
        ])
    ]

    # ElementValueBranchModelElement
    fixed_data_me1 = FixedDataModelElement("fixed1", b'match ')
    fixed_data_me2 = FixedDataModelElement("fixed2", b'fixed String')
    fixed_wordlist_data_model_element = FixedWordlistDataModelElement(
        "wordlist", [b'data: ', b'string: '])
    decimal_integer_value_model_element = DecimalIntegerValueModelElement(
        "decimal")

    service_children_parsing_model_element.append(
        ElementValueBranchModelElement(
            'ElementValueBranchModelElement',
            FirstMatchModelElement("first", [
                SequenceModelElement(
                    "seq1",
                    [fixed_data_me1, fixed_wordlist_data_model_element]),
                SequenceModelElement("seq2", [
                    fixed_data_me1, fixed_wordlist_data_model_element,
                    fixed_data_me2
                ])
            ]), "wordlist", {
                0: decimal_integer_value_model_element,
                1: fixed_data_me2
            }))
    service_children_parsing_model_element.append(
        HexStringModelElement('HexStringModelElement'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            FixedDataModelElement('FixedDataModelElement',
                                  b'Gateway IP-Address: '),
            IpAddressDataModelElement('IpAddressDataModelElement')
        ]))
    service_children_parsing_model_element.append(
        MultiLocaleDateTimeModelElement('MultiLocaleDateTimeModelElement',
                                        [(b'%b %d %Y', "de_AT.utf8", None)]))
    service_children_parsing_model_element.append(
        RepeatedElementDataModelElement(
            'RepeatedElementDataModelElement',
            SequenceModelElement('SequenceModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'drawn number: '),
                DecimalIntegerValueModelElement(
                    'DecimalIntegerValueModelElement')
            ]), 1))
    service_children_parsing_model_element.append(
        VariableByteDataModelElement('VariableByteDataModelElement', b'-@#'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            WhiteSpaceLimitedDataModelElement(
                'WhiteSpaceLimitedDataModelElement'),
            FixedDataModelElement('', b' ')
        ]))

    # The Base64StringModelElement must be just before the AnyByteDataModelElement to avoid unexpected Matches.
    service_children_parsing_model_element.append(
        Base64StringModelElement('Base64StringModelElement'))

    # The OptionalMatchModelElement must be paired with a FirstMatchModelElement because it accepts all data and thus no data gets
    # to the AnyByteDataModelElement. The AnyByteDataModelElement must be last, because all bytes are accepted.
    service_children_parsing_model_element.append(
        OptionalMatchModelElement(
            'OptionalMatchModelElement',
            FirstMatchModelElement('FirstMatchModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'The-searched-element-was-found!'),
                AnyByteDataModelElement('AnyByteDataModelElement')
            ])))

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement',
                             service_children_cron_job_announcement),
        SequenceModelElement('CronExecution',
                             service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job),
        SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details),
        DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time),
        SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address),
        FirstMatchModelElement('ParsingME',
                               service_children_parsing_model_element)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers
    # later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
        [atom_filter])
    analysis_context.register_component(
        simple_monotonic_timestamp_adjust,
        component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    from aminer.events.SyslogWriterEventHandler import SyslogWriterEventHandler
    syslog_event_handler = SyslogWriterEventHandler(analysis_context)
    from aminer.events.DefaultMailNotificationEventHandler import DefaultMailNotificationEventHandler
    if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysis_context.aminer_config.config_properties:
        mail_notification_handler = DefaultMailNotificationEventHandler(
            analysis_context)
        analysis_context.register_component(mail_notification_handler,
                                            component_name="MailHandler")
    anomaly_event_handlers = [
        stream_printer_event_handler, syslog_event_handler,
        mail_notification_handler
    ]

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_monotonic_timestamp_adjust],
        anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler,
                            stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(
        analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(
        timestamps_unsorted_detector,
        component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule(
                    '/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(
                    Rules.ValueMatchRule('/model/LoginDetails/Username',
                                         b'root'))
            ]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(
                    Rules.PathExistsMatchRule(
                        '/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails')
            ]),
            Rules.NegationMatchRule(
                Rules.PathExistsMatchRule('/model/LoginDetails'))
        ])
    ]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, allowlist_rules,
        anomaly_event_handlers)
    analysis_context.register_component(allowlist_violation_detector,
                                        component_name="Allowlist")
    atom_filter.add_handler(allowlist_violation_detector)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10000th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(
            tuple(match_value_list))
        if extra_data is not None:
            mod = 10000
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/UName', '/model/DailyCron/JobNumber'],
        anomaly_event_handlers,
        auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function)
    analysis_context.register_component(
        enhanced_new_match_path_value_combo_detector,
        component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(
        86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(
        analysis_context.aminer_config,
        [('/model/RandomTime/Random', modulo_time_bin_definition),
         ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers)
    analysis_context.register_component(histogram_analysis,
                                        component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime',
        modulo_time_bin_definition, 10, anomaly_event_handlers)
    analysis_context.register_component(
        path_dependent_histogram_analysis,
        component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(
        analysis_context.aminer_config, anomaly_event_handlers, None,
        ['/model/Random'], 100, 10)
    analysis_context.register_component(
        match_value_average_change_detector,
        component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(sys.stdout, [
        '/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload',
        '/model/Sensors/DTM'
    ], b';', b'')
    analysis_context.register_component(
        match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(
        new_match_path_value_combo_detector,
        component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/JobNumber', '/model/IPAddresses/Username'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'],
        anomaly_event_handlers,
        auto_include_flag=True,
        default_interval=2,
        realert_interval=5)
    analysis_context.register_component(missing_match_path_value_detector,
                                        component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        2,
        min_rule_attributes=1,
        max_rule_attributes=5,
        record_count_before_event=70000,
        output_log_line=True)
    analysis_context.register_component(
        time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule(
        'CronJobAnnouncement',
        5,
        6,
        max_artefacts_a_for_single_b=1,
        artefact_match_parameters=[('/model/CronAnnouncement/JobNumber',
                                    '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement',
                                          [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None,
                                          [cron_job_announcement])
    rules = [
        Rules.PathExistsMatchRule('/model/CronAnnouncement/Run',
                                  a_class_selector),
        Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)
    ]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(
        analysis_context.aminer_config, rules, anomaly_event_handlers)
    analysis_context.register_component(
        time_correlation_violation_detector,
        component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)