Example #1
0
def getModel():
    """This function defines how to parse messages from kernel logging."""

    typeChildren = []
    typeChildren.append(
        SequenceModelElement('ipv4-martian', [
            FixedDataModelElement('s0', b'IPv4: martian '),
            FixedWordlistDataModelElement('direction',
                                          [b'source', b'destination']),
            FixedDataModelElement('s1', b' '),
            IpAddressDataModelElement('destination'),
            FixedDataModelElement('s2', b' from '),
            IpAddressDataModelElement('source'),
            FixedDataModelElement('s3', b', on dev '),
            AnyByteDataModelElement('interface')
        ]))

    typeChildren.append(
        SequenceModelElement('net-llheader', [
            FixedDataModelElement('s0', b'll header: '),
            AnyByteDataModelElement('data')
        ]))

    typeChildren.append(AnyByteDataModelElement('unparsed'))

    model = SequenceModelElement('kernel', [
        FixedDataModelElement('sname', b'kernel: ['),
        DelimitedDataModelElement('timestamp', b']'),
        FixedDataModelElement('s0', b'] '),
        FirstMatchModelElement('msg', typeChildren)
    ])
    return model
def get_model():
    """Return a model to parse Apache Error logs from the AIT-LDS."""
    model = SequenceModelElement('model', [
        FixedDataModelElement('sp1', b'['),
        FixedWordlistDataModelElement('day', [b'Mon', b'Tue', b'Wed', b'Thu', b'Fri', b'Sat', b'Sun']),
        FixedDataModelElement('sp2', b' '),
        DateTimeModelElement('time', b'%b %d %H:%M:%S.%f %Y'),
        FixedDataModelElement('error_str', b'] [:error] [pid '),
        DecimalIntegerValueModelElement('pid'),
        FixedDataModelElement('sp3', b'] [client '),
        IpAddressDataModelElement('client_ip'),
        FixedDataModelElement('colon', b':'),
        DecimalIntegerValueModelElement('client_port'),
        FixedDataModelElement('php', b'] PHP '),
        FirstMatchModelElement('fphp', [
            SequenceModelElement('warning', [
                FixedDataModelElement('warning_str', b'Warning:  '),
                FirstMatchModelElement('warning', [
                    SequenceModelElement('declaration', [
                        FixedDataModelElement('declaration_str', b'Declaration of '),
                        DelimitedDataModelElement('function', b')'),
                        FixedDataModelElement('compatible_str', b') should be compatible with '),
                        DelimitedDataModelElement('function2', b')'),
                        FixedDataModelElement('compatible_str', b') in '),
                        DelimitedDataModelElement('path', b' '),
                        FixedDataModelElement('compatible_str', b' on line '),
                        DecimalIntegerValueModelElement('line'),
                        FixedDataModelElement('referer_str', b', referer: '),
                        AnyByteDataModelElement('referer')]),
                    SequenceModelElement('system', [
                        FixedDataModelElement('system_str', b'system(): Cannot execute a blank command in '),
                        DelimitedDataModelElement('path', b' '),
                        FixedDataModelElement('compatible_str', b' on line '),
                        DecimalIntegerValueModelElement('line')])])]),
            SequenceModelElement('notice', [
                FixedDataModelElement('notice_str', b'Notice:  Undefined index: '),
                DelimitedDataModelElement('command', b' '),
                FixedDataModelElement('sp', b' in '),
                DelimitedDataModelElement('path', b' '),
                FixedDataModelElement('compatible_str', b' on line '),
                DecimalIntegerValueModelElement('line')]),
            SequenceModelElement('deprecated', [
                FixedDataModelElement('deprecated_str', b'Deprecated:  Methods with the same name as their class '
                                      b'will not be constructors in a future version of PHP; '),
                DelimitedDataModelElement('class', b' '),
                FixedDataModelElement('constructor_str', b' has a deprecated constructor in '),
                DelimitedDataModelElement('path', b' '),
                FixedDataModelElement('compatible_str', b' on line '),
                DecimalIntegerValueModelElement('line'),
                FixedDataModelElement('referer_str', b', referer: '),
                AnyByteDataModelElement('referer'),
                ])])])

    return model
Example #3
0
def get_model():
    """This method returns the model."""
    type_children = [
        FixedDataModelElement(
            'warn-no-openat',
            b'WARNING: SECURITY: No secure open yet due to missing openat in python!'
        ),
        FixedDataModelElement(
            'warn-no-OPATH',
            b'WARNING: SECURITY: Open should use O_PATH, but not yet available in python'
        ),
        FixedDataModelElement(
            'warn-POSIX-acls',
            b'WARNING: SECURITY: No checking for backdoor access via \
          POSIX ACLs, use "getfacl" from "acl" package to check manually.'),
        FixedDataModelElement(
            'warn-no-linkat',
            b'WARNING: SECURITY: unsafe unlink (unavailable unlinkat/linkat \
          should be used, but not available in python)'),
        AnyByteDataModelElement('unparsed')
    ]

    model = SequenceModelElement('aminer', [
        FixedDataModelElement('sname', b'AMiner['),
        DecimalIntegerValueModelElement('pid'),
        FixedDataModelElement('s0', b']: '),
        FirstMatchModelElement('msg', type_children)
    ])
    return model
Example #4
0
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct AMiner how to process incoming data streams to create log atoms from them.
    """
    # Build the parsing model:
    from aminer.parsing import AnyByteDataModelElement

    parsing_model = AnyByteDataModelElement('AnyByteDataModelElement')

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    anomaly_event_handlers = [stream_printer_event_handler]

    # Now define the AtomizerFactory using the model. A simple line
    # based one is usually sufficient.
    from aminer.input import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [atom_filter], anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.input import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler,
                            stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config, ['/AnyByteDataModelElement'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)
def get_model():
    """This method returns the model."""
    type_children = [
        FixedDataModelElement('start',
                              b' * Starting Tomcat servlet engine tomcat7'),
        FixedDataModelElement('stop',
                              b' * Stopping Tomcat servlet engine tomcat7'),
        FixedDataModelElement('done', b'   ...done.'),
        AnyByteDataModelElement('unparsed')
    ]

    model = SequenceModelElement('tomcat7', [
        FixedDataModelElement('sname', b'tomcat7['),
        DecimalIntegerValueModelElement('pid'),
        FixedDataModelElement('s0', b']: '),
        FirstMatchModelElement('msg', type_children)
    ])
    return model
Example #6
0
def get_model(user_name_model=None):
    """This function defines how to parse a cron message logged via syslog after any standard logging preamble, e.g. from syslog."""

    if user_name_model is None:
        user_name_model = VariableByteDataModelElement('user', b'0123456789abcdefghijklmnopqrstuvwxyz.-')

    type_children = [
        SequenceModelElement('exec', [
            FixedDataModelElement('s0', b'('),
            user_name_model,
            FixedDataModelElement('s1', b') CMD '),
            AnyByteDataModelElement('command')
        ]),
        SequenceModelElement('pam', [
            FixedDataModelElement('s0', b'pam_unix(cron:session): session '),
            FixedWordlistDataModelElement('change', [b'opened', b'closed']),
            FixedDataModelElement('s1', b' for user '),
            user_name_model,
            OptionalMatchModelElement('openby', FixedDataModelElement('default', b' by (uid=0)'))
        ])
    ]

    model = FirstMatchModelElement('cron', [
        SequenceModelElement('std', [
            FixedDataModelElement('sname', b'CRON['),
            DecimalIntegerValueModelElement('pid'),
            FixedDataModelElement('s0', b']: '),
            FirstMatchModelElement('msgtype', type_children)
        ]),
        SequenceModelElement('low', [
            FixedDataModelElement('sname', b'cron['),
            DecimalIntegerValueModelElement('pid'),
            FixedDataModelElement('s0', b']: (*system*'),
            DelimitedDataModelElement('rname', b') RELOAD ('),
            FixedDataModelElement('s1', b') RELOAD ('),
            DelimitedDataModelElement('fname', b')'),
            FixedDataModelElement('s2', b')'), ])
    ])
    return model
class EventTypeDetectorTest(TestBase):
    """Unittests for the EventTypeDetector."""

    log_lines = [
        b'type=SYSCALL msg=audit(1580367384.000:1): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367385.000:1): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367386.000:2): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367387.000:2): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367388.000:3): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367389.000:3): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367388.500:100): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1'
        b' ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=SYSCALL msg=audit(1580367390.000:4): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367391.000:4): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=PATH msg=audit(1580367392.000:5): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367393.000:5): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=SYSCALL msg=audit(1580367394.000:6): arch=c000003e syscall=4 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367395.000:7): item=0 name="five" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367396.000:8): arch=c000003e syscall=6 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367397.000:6): item=0 name="four" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367398.000:7): arch=c000003e syscall=5 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367399.000:8): item=0 name="six" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367400.000:9): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367401.000:9): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=PATH msg=audit(1580367402.000:10): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367403.000:10): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)'
    ]

    parsing_model = FirstMatchModelElement('type', [
        SequenceModelElement('path', [
            FixedDataModelElement('type', b'type=PATH '),
            FixedDataModelElement('msg_audit', b'msg=audit('),
            DelimitedDataModelElement('msg', b':'),
            FixedDataModelElement('placeholder', b':'),
            DecimalIntegerValueModelElement('id'),
            FixedDataModelElement('item_string', b'): item='),
            DecimalIntegerValueModelElement('item'),
            FixedDataModelElement('name_string', b' name="'),
            DelimitedDataModelElement('name', b'"'),
            FixedDataModelElement('inode_string', b'" inode='),
            DecimalIntegerValueModelElement('inode'),
            FixedDataModelElement('dev_string', b' dev='),
            DelimitedDataModelElement('dev', b' '),
            FixedDataModelElement('mode_string', b' mode='),
            DecimalIntegerValueModelElement('mode'),
            FixedDataModelElement('ouid_string', b' ouid='),
            DecimalIntegerValueModelElement('ouid'),
            FixedDataModelElement('ogid_string', b' ogid='),
            DecimalIntegerValueModelElement('ogid'),
            FixedDataModelElement('rdev_string', b' rdev='),
            DelimitedDataModelElement('rdev', b' '),
            FixedDataModelElement('nametype_string', b' nametype='),
            FixedWordlistDataModelElement('nametype', [b'NORMAL', b'ERROR'])
        ]),
        SequenceModelElement('syscall', [
            FixedDataModelElement('type', b'type=SYSCALL '),
            FixedDataModelElement('msg_audit', b'msg=audit('),
            DelimitedDataModelElement('msg', b':'),
            FixedDataModelElement('placeholder', b':'),
            DecimalIntegerValueModelElement('id'),
            FixedDataModelElement('arch_string', b'): arch='),
            DelimitedDataModelElement('arch', b' '),
            FixedDataModelElement('syscall_string', b' syscall='),
            DecimalIntegerValueModelElement('syscall'),
            FixedDataModelElement('success_string', b' success='),
            FixedWordlistDataModelElement('success', [b'yes', b'no']),
            FixedDataModelElement('exit_string', b' exit='),
            DecimalIntegerValueModelElement('exit'),
            AnyByteDataModelElement('remainding_data')
        ])
    ])

    def test1receive_atoms_with_default_values(self):
        """
        In this test case multiple log_atoms are received with default values of the EventTypeDetector.
        path_list is empty and all paths are learned dynamically in variable_key_list.
        """
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        log_atoms = []
        for line in self.log_lines:
            t = time.time()
            log_atoms.append(
                LogAtom(
                    line,
                    ParserMatch(
                        self.parsing_model.get_match_element(
                            'parser', MatchContext(line))), t,
                    self.__class__.__name__))
        for i, log_atom in enumerate(log_atoms):
            self.assertTrue(event_type_detector.receive_atom(log_atom))
            self.assertEqual(event_type_detector.total_records, i + 1)

    def test2receive_atoms_with_defined_path_list(self):
        """
        In this test case multiple log_atoms are received with default values of the EventTypeDetector.
        path_list is set to a static list of paths and variable_key_list should not be used.
        """
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler],
            path_list=['parser/type/path/nametype'])
        results = [
            True, False, True, False, True, False, True, True, False, False,
            True, True, False, True, False, True, False, True, False, False,
            True
        ]
        log_atoms = []
        for line in self.log_lines:
            t = time.time()
            log_atoms.append(
                LogAtom(
                    line,
                    ParserMatch(
                        self.parsing_model.get_match_element(
                            'parser', MatchContext(line))), t,
                    self.__class__.__name__))
        for i, log_atom in enumerate(log_atoms):
            old_vals = (event_type_detector.num_events,
                        event_type_detector.num_eventlines,
                        event_type_detector.total_records,
                        event_type_detector.longest_path)
            self.assertEqual(event_type_detector.receive_atom(log_atom),
                             not results[i], i)
            if results[i]:
                self.assertEqual(old_vals, (event_type_detector.num_events,
                                            event_type_detector.num_eventlines,
                                            event_type_detector.total_records,
                                            event_type_detector.longest_path))

    def test3append_values_float(self):
        """This unittest checks the append_values method with raw_match_object being a float value."""
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        # initialize all values.
        t = time.time()
        log_atom = LogAtom(
            b'22.2', ParserMatch(MatchElement('path', '22.2', 22.2, None)), t,
            self.__class__.__name__)
        event_type_detector.receive_atom(log_atom)

        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[[22.2]]])

        log_atom = LogAtom(b'22',
                           ParserMatch(MatchElement('path', '22', 22, None)),
                           t, self.__class__.__name__)
        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[[22]]])

        log_atom = LogAtom(
            b'22.2', ParserMatch(MatchElement('path', '22', b'22', None)), t,
            self.__class__.__name__)
        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[[22]]])

    def test4append_values_bytestring(self):
        """
        This unittest checks the append_values method with raw_match_object being a bytestring.
        This should trigger a ValueError and append the match_string.
        """
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        # initialize all values.
        t = time.time()
        log_atom = LogAtom(
            b'This is a string',
            ParserMatch(
                MatchElement('path', 'This is a string', b'This is a string',
                             None)), t, self.__class__.__name__)
        event_type_detector.receive_atom(log_atom)

        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[['This is a string']]])

        log_atom = LogAtom(
            b'24.05.',
            ParserMatch(MatchElement('path', '24.05.', b'24.05.', None)), t,
            self.__class__.__name__)
        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[['24.05.']]])

    def test5check_value_reduction(self):
        """This unittest checks the functionality of reducing the values when the maxNumVals threshold is reached."""
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        t = time.time()
        val_list = [[[]]]
        for i in range(1, event_type_detector.max_num_vals + 1, 1):
            log_atom = LogAtom(
                str(i).encode(),
                ParserMatch(MatchElement('path', str(i), i, None)), t,
                self.__class__.__name__)
            val_list[0][0].append(float(i))
            self.assertTrue(event_type_detector.receive_atom(log_atom))
            self.assertEqual(event_type_detector.values, val_list)
        i += 1
        log_atom = LogAtom(
            str(i).encode(), ParserMatch(MatchElement('path', str(i), i,
                                                      None)), t,
            self.__class__.__name__)
        val_list[0][0].append(float(i))
        self.assertTrue(event_type_detector.receive_atom(log_atom))
        self.assertEqual(
            event_type_detector.values,
            [[val_list[0][0][-event_type_detector.min_num_vals:]]])

    def test6persist_and_load_data(self):
        """This unittest checks the functionality of the persistence by persisting and reloading values."""
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        t = time.time()
        log_atom = LogAtom(
            b'22.2', ParserMatch(MatchElement('path', '22.2', 22.2, None)), t,
            self.__class__.__name__)
        event_type_detector.receive_atom(log_atom)
        event_type_detector.do_persist()
        event_type_detector_loaded = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        self.assertEqual(event_type_detector.variable_key_list,
                         event_type_detector_loaded.variable_key_list)
        self.assertEqual(event_type_detector.values,
                         event_type_detector_loaded.values)
        self.assertEqual(event_type_detector.longest_path,
                         event_type_detector_loaded.longest_path)
        self.assertEqual(event_type_detector.check_variables,
                         event_type_detector_loaded.check_variables)
        self.assertEqual(event_type_detector.num_eventlines,
                         event_type_detector_loaded.num_eventlines)
Example #8
0
class NewMatchIdValueComboDetectorTest(TestBase):
    log_lines = [
        b'type=SYSCALL msg=audit(1580367384.000:1): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367385.000:1): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367386.000:2): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367387.000:2): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367388.000:3): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367389.000:3): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367388.500:100): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1'
        b' ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=SYSCALL msg=audit(1580367390.000:4): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367391.000:4): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=PATH msg=audit(1580367392.000:5): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367393.000:5): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=SYSCALL msg=audit(1580367394.000:6): arch=c000003e syscall=4 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367395.000:7): item=0 name="five" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367396.000:8): arch=c000003e syscall=6 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367397.000:6): item=0 name="four" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367398.000:7): arch=c000003e syscall=5 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367399.000:8): item=0 name="six" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367400.000:9): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)',
        b'type=PATH msg=audit(1580367401.000:9): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=PATH msg=audit(1580367402.000:10): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
        b'nametype=NORMAL',
        b'type=SYSCALL msg=audit(1580367403.000:10): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f items=1 '
        b'ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) ses=4294967295 '
        b'comm="apache2" exe="/usr/sbin/apache2" key=(null)']

    expected_whitelist_string = "Whitelisted path(es) parser/type/path/name, parser/type/syscall/syscall with %s in %s"

    parsing_model = FirstMatchModelElement('type', [SequenceModelElement('path', [
        FixedDataModelElement('type', b'type=PATH '), FixedDataModelElement('msg_audit', b'msg=audit('),
        DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
        FixedDataModelElement('item_string', b'): item='), DecimalIntegerValueModelElement('item'),
        FixedDataModelElement('name_string', b' name="'), DelimitedDataModelElement('name', b'"'),
        FixedDataModelElement('inode_string', b'" inode='), DecimalIntegerValueModelElement('inode'),
        FixedDataModelElement('dev_string', b' dev='), DelimitedDataModelElement('dev', b' '),
        FixedDataModelElement('mode_string', b' mode='), DecimalIntegerValueModelElement('mode'),
        FixedDataModelElement('ouid_string', b' ouid='), DecimalIntegerValueModelElement('ouid'),
        FixedDataModelElement('ogid_string', b' ogid='), DecimalIntegerValueModelElement('ogid'),
        FixedDataModelElement('rdev_string', b' rdev='), DelimitedDataModelElement('rdev', b' '),
        FixedDataModelElement('nametype_string', b' nametype='), FixedWordlistDataModelElement('nametype', [b'NORMAL', b'ERROR'])]),
        SequenceModelElement('syscall', [
            FixedDataModelElement('type', b'type=SYSCALL '), FixedDataModelElement('msg_audit', b'msg=audit('),
            DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
            FixedDataModelElement('arch_string', b'): arch='), DelimitedDataModelElement('arch', b' '),
            FixedDataModelElement('syscall_string', b' syscall='), DecimalIntegerValueModelElement('syscall'),
            FixedDataModelElement('success_string', b' success='), FixedWordlistDataModelElement('success', [b'yes', b'no']),
            FixedDataModelElement('exit_string', b' exit='), DecimalIntegerValueModelElement('exit'),
            AnyByteDataModelElement('remainding_data')])])

    def test1receive_match_in_time_with_auto_include_flag(self):
        """This test case checks if log_atoms are accepted as expected with the auto_include_flag=True."""
        description = 'test1newMatchIdValueComboDetectorTest'
        output_stream_empty_results = [True, False, True, False, True, False, True, True, True, True, True, True, True, True, False, False,
                                       False, True, False, True, False]
        id_dict_current_results = [
            {1: {'parser/type/syscall/syscall': 1}}, {}, {2: {'parser/type/syscall/syscall': 2}}, {},
            {3: {'parser/type/syscall/syscall': 3}}, {}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 4: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 5: {'parser/type/path/name': 'two'}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}},
            {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'}},
            {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'},
                8: {'parser/type/syscall/syscall': 6}},
            {100: {'parser/type/syscall/syscall': 1}, 7: {'parser/type/path/name': 'five'}, 8: {'parser/type/syscall/syscall': 6}},
            {100: {'parser/type/syscall/syscall': 1}, 8: {'parser/type/syscall/syscall': 6}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 9: {'parser/type/syscall/syscall': 2}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 10: {'parser/type/path/name': 'one'}}, {100: {'parser/type/syscall/syscall': 1}}]
        id_dict_old_results = [{}] * 21
        min_allowed_time_diff = 0.1
        log_atoms = []
        for line in self.log_lines:
            t = time.time()
            log_atoms.append(
                LogAtom(line, ParserMatch(self.parsing_model.get_match_element('parser', MatchContext(line))), t, self.__class__.__name__))
        new_match_id_value_combo_detector = NewMatchIdValueComboDetector(self.aminer_config, [
            'parser/type/path/name', 'parser/type/syscall/syscall'], [self.stream_printer_event_handler],
            id_path_list=['parser/type/path/id', 'parser/type/syscall/id'], min_allowed_time_diff=min_allowed_time_diff,
            auto_include_flag=True, allow_missing_values_flag=True, persistence_id='audit_type_path', output_log_line=False)
        self.analysis_context.register_component(new_match_id_value_combo_detector, description)

        for i, log_atom in enumerate(log_atoms):
            self.assertTrue(new_match_id_value_combo_detector.receive_atom(log_atom))
            self.assertEqual(self.output_stream.getvalue() == "", output_stream_empty_results[i], log_atom.raw_data)
            self.assertEqual(new_match_id_value_combo_detector.id_dict_current, id_dict_current_results[i])
            self.assertEqual(new_match_id_value_combo_detector.id_dict_old, id_dict_old_results[i])
            self.reset_output_stream()

    def test2receive_match_after_max_allowed_time_diff_with_auto_include_flag(self):
        """This test case checks if log_atoms are deleted after the maximal allowed time difference with the auto_include_flag=True."""
        description = 'test2newMatchIdValueComboDetectorTest'
        output_stream_empty_results = [True, False, True, False, True, False, True, True, True, True, True, True, True, True, False, False,
                                       False, True, False, True, False]
        id_dict_current_results = [
            {1: {'parser/type/syscall/syscall': 1}}, {}, {2: {'parser/type/syscall/syscall': 2}}, {},
            {3: {'parser/type/syscall/syscall': 3}}, {}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 4: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}},
            {5: {'parser/type/path/name': 'two'}, 100: {'parser/type/syscall/syscall': 1}}, {}, {6: {'parser/type/syscall/syscall': 4}},
            {6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'}},
            {6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'}, 8: {'parser/type/syscall/syscall': 6}},
            {7: {'parser/type/path/name': 'five'}, 8: {'parser/type/syscall/syscall': 6}}, {}, {}, {9: {'parser/type/syscall/syscall': 2}},
            {}, {10: {'parser/type/path/name': 'one'}}, {}]
        id_dict_old_results = [{}] * 10 + [{100: {'parser/type/syscall/syscall': 1}}] * 5 + [{8: {'parser/type/syscall/syscall': 6}}] + [
            {}] * 5
        min_allowed_time_diff = 5
        log_atoms = []
        t = time.time()
        for line in self.log_lines:
            log_atoms.append(
                LogAtom(line, ParserMatch(self.parsing_model.get_match_element('parser', MatchContext(line))), t, self.__class__.__name__))
            t = t + min_allowed_time_diff * 0.25

        new_match_id_value_combo_detector = NewMatchIdValueComboDetector(self.aminer_config, [
            'parser/type/path/name', 'parser/type/syscall/syscall'], [self.stream_printer_event_handler],
            id_path_list=['parser/type/path/id', 'parser/type/syscall/id'], min_allowed_time_diff=min_allowed_time_diff,
            auto_include_flag=True, allow_missing_values_flag=True, persistence_id='audit_type_path', output_log_line=False)
        self.analysis_context.register_component(new_match_id_value_combo_detector, description)

        for i, log_atom in enumerate(log_atoms):
            self.assertTrue(new_match_id_value_combo_detector.receive_atom(log_atom))
            self.assertEqual(self.output_stream.getvalue() == "", output_stream_empty_results[i], log_atom.raw_data)
            self.assertEqual(new_match_id_value_combo_detector.id_dict_current, id_dict_current_results[i], log_atom.raw_data)
            self.assertEqual(new_match_id_value_combo_detector.id_dict_old, id_dict_old_results[i])
            self.reset_output_stream()

    def test3receive_match_in_time_without_auto_include_flag(self):
        """This test case checks if log_atoms are accepted as expected with the auto_include_flag=False."""
        description = 'test3newMatchIdValueComboDetectorTest'
        output_stream_empty_results = [True, False, True, False, True, False, True, True, False, True, False, True, True, True, False,
                                       False, False, True, False, True, False]
        id_dict_current_results = [
            {1: {'parser/type/syscall/syscall': 1}}, {}, {2: {'parser/type/syscall/syscall': 2}}, {},
            {3: {'parser/type/syscall/syscall': 3}}, {}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 4: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 5: {'parser/type/path/name': 'two'}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}},
            {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'}},
            {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'},
                8: {'parser/type/syscall/syscall': 6}},
            {100: {'parser/type/syscall/syscall': 1}, 7: {'parser/type/path/name': 'five'}, 8: {'parser/type/syscall/syscall': 6}},
            {100: {'parser/type/syscall/syscall': 1}, 8: {'parser/type/syscall/syscall': 6}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 9: {'parser/type/syscall/syscall': 2}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 10: {'parser/type/path/name': 'one'}}, {100: {'parser/type/syscall/syscall': 1}}]
        id_dict_old_results = [{}] * 21
        min_allowed_time_diff = 0.1
        log_atoms = []
        for line in self.log_lines:
            t = time.time()
            log_atoms.append(
                LogAtom(line, ParserMatch(self.parsing_model.get_match_element('parser', MatchContext(line))), t, self.__class__.__name__))
        new_match_id_value_combo_detector = NewMatchIdValueComboDetector(self.aminer_config, [
            'parser/type/path/name', 'parser/type/syscall/syscall'], [self.stream_printer_event_handler],
            id_path_list=['parser/type/path/id', 'parser/type/syscall/id'], min_allowed_time_diff=min_allowed_time_diff,
            auto_include_flag=False, allow_missing_values_flag=True, persistence_id='audit_type_path', output_log_line=False)
        self.analysis_context.register_component(new_match_id_value_combo_detector, description)

        for i, log_atom in enumerate(log_atoms):
            self.assertTrue(new_match_id_value_combo_detector.receive_atom(log_atom))
            self.assertEqual(self.output_stream.getvalue() == "", output_stream_empty_results[i], log_atom.raw_data)
            self.assertEqual(new_match_id_value_combo_detector.id_dict_current, id_dict_current_results[i])
            self.assertEqual(new_match_id_value_combo_detector.id_dict_old, id_dict_old_results[i])
            self.assertEqual(new_match_id_value_combo_detector.known_values, [])
            self.reset_output_stream()

    def test4receive_match_after_max_allowed_time_diff_without_auto_include_flag(self):
        """This test case checks if log_atoms are deleted after the maximal allowed time difference with the auto_include_flag=False."""
        description = 'test4newMatchIdValueComboDetectorTest'
        output_stream_empty_results = [True, False, True, False, True, False, True, True, False, True, False, True, True, True, False,
                                       False, False, True, False, True, False]
        id_dict_current_results = [
            {1: {'parser/type/syscall/syscall': 1}}, {}, {2: {'parser/type/syscall/syscall': 2}}, {},
            {3: {'parser/type/syscall/syscall': 3}}, {}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 4: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}},
            {5: {'parser/type/path/name': 'two'}, 100: {'parser/type/syscall/syscall': 1}}, {}, {6: {'parser/type/syscall/syscall': 4}},
            {6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'}},
            {6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'}, 8: {'parser/type/syscall/syscall': 6}},
            {7: {'parser/type/path/name': 'five'}, 8: {'parser/type/syscall/syscall': 6}}, {}, {}, {9: {'parser/type/syscall/syscall': 2}},
            {}, {10: {'parser/type/path/name': 'one'}}, {}]
        id_dict_old_results = [{}] * 10 + [{100: {'parser/type/syscall/syscall': 1}}] * 5 + [{8: {'parser/type/syscall/syscall': 6}}] + [
            {}] * 5
        min_allowed_time_diff = 5
        log_atoms = []
        t = time.time()
        for line in self.log_lines:
            log_atoms.append(
                LogAtom(line, ParserMatch(self.parsing_model.get_match_element('parser', MatchContext(line))), t, self.__class__.__name__))
            t = t + min_allowed_time_diff * 0.25

        new_match_id_value_combo_detector = NewMatchIdValueComboDetector(self.aminer_config, [
            'parser/type/path/name', 'parser/type/syscall/syscall'], [self.stream_printer_event_handler],
            id_path_list=['parser/type/path/id', 'parser/type/syscall/id'], min_allowed_time_diff=min_allowed_time_diff,
            auto_include_flag=False, allow_missing_values_flag=True, persistence_id='audit_type_path', output_log_line=False)
        self.analysis_context.register_component(new_match_id_value_combo_detector, description)

        for i, log_atom in enumerate(log_atoms):
            self.assertTrue(new_match_id_value_combo_detector.receive_atom(log_atom))
            self.assertEqual(self.output_stream.getvalue() == "", output_stream_empty_results[i], log_atom.raw_data)
            self.assertEqual(new_match_id_value_combo_detector.id_dict_current, id_dict_current_results[i], log_atom.raw_data)
            self.assertEqual(new_match_id_value_combo_detector.id_dict_old, id_dict_old_results[i])
            self.assertEqual(new_match_id_value_combo_detector.known_values, [])
            self.reset_output_stream()

    def test5whitelist_unknown_target_path(self):
        """This test case checks if a unknown target path can be added to the known_values with the whitelist_event method."""
        description = 'test5newMatchIdValueComboDetectorTest'
        min_allowed_time_diff = 5
        new_match_id_value_combo_detector = NewMatchIdValueComboDetector(self.aminer_config, [
            'parser/type/path/name', 'parser/type/syscall/syscall'], [self.stream_printer_event_handler],
            id_path_list=['parser/type/path/id', 'parser/type/syscall/id'], min_allowed_time_diff=min_allowed_time_diff,
            auto_include_flag=False, allow_missing_values_flag=True, persistence_id='audit_type_path', output_log_line=False)
        self.analysis_context.register_component(new_match_id_value_combo_detector, description)
        self.assertEqual(new_match_id_value_combo_detector.known_values, [])
        sorted_log_lines = [self.log_lines[1]]
        event_data = [self.log_lines[1], {'parser/type/syscall/syscall': 1, 'parser/type/path/name': 'one'}]
        output = new_match_id_value_combo_detector.whitelist_event('Analysis.%s' % new_match_id_value_combo_detector.__class__.__name__,
                                                                   sorted_log_lines, event_data, None)
        self.assertEqual(new_match_id_value_combo_detector.known_values, [
            {'parser/type/syscall/syscall': 1, 'parser/type/path/name': 'one'}])
        self.assertEqual(output, self.expected_whitelist_string % (event_data[1], self.log_lines[1]))

        sorted_log_lines = [self.log_lines[3]]
        event_data = [self.log_lines[3], {'parser/type/syscall/syscall': 2, 'parser/type/path/name': 'two'}]
        output = new_match_id_value_combo_detector.whitelist_event('Analysis.%s' % new_match_id_value_combo_detector.__class__.__name__,
                                                                   sorted_log_lines, event_data, None)
        self.assertEqual(new_match_id_value_combo_detector.known_values, [
            {'parser/type/syscall/syscall': 1, 'parser/type/path/name': 'one'},
            {'parser/type/syscall/syscall': 2, 'parser/type/path/name': 'two'}])
        self.assertEqual(output, self.expected_whitelist_string % (event_data[1], self.log_lines[3]))

    def test6whitelist_known_target_path(self):
        """This test case checks if a known target path is not added twice to the known_values with the whitelist_event method."""
        description = 'test6newMatchIdValueComboDetectorTest'
        min_allowed_time_diff = 5
        new_match_id_value_combo_detector = NewMatchIdValueComboDetector(self.aminer_config, [
            'parser/type/path/name', 'parser/type/syscall/syscall'], [self.stream_printer_event_handler],
            id_path_list=['parser/type/path/id', 'parser/type/syscall/id'], min_allowed_time_diff=min_allowed_time_diff,
            auto_include_flag=False, allow_missing_values_flag=True, persistence_id='audit_type_path', output_log_line=False)
        self.analysis_context.register_component(new_match_id_value_combo_detector, description)
        self.assertEqual(new_match_id_value_combo_detector.known_values, [])
        sorted_log_lines = [self.log_lines[1]]
        event_data = [self.log_lines[1], {'parser/type/syscall/syscall': 1, 'parser/type/path/name': 'one'}]
        output = new_match_id_value_combo_detector.whitelist_event('Analysis.%s' % new_match_id_value_combo_detector.__class__.__name__,
                                                                   sorted_log_lines, event_data, None)
        self.assertEqual(new_match_id_value_combo_detector.known_values,
                         [{'parser/type/syscall/syscall': 1, 'parser/type/path/name': 'one'}])
        self.assertEqual(output, self.expected_whitelist_string % (event_data[1], self.log_lines[1]))

        sorted_log_lines = [self.log_lines[1]]
        event_data = [self.log_lines[1], {'parser/type/syscall/syscall': 1, 'parser/type/path/name': 'one'}]
        output = new_match_id_value_combo_detector.whitelist_event('Analysis.%s' % new_match_id_value_combo_detector.__class__.__name__,
                                                                   sorted_log_lines, event_data, None)
        self.assertEqual(new_match_id_value_combo_detector.known_values,
                         [{'parser/type/syscall/syscall': 1, 'parser/type/path/name': 'one'}])
        self.assertEqual(output, self.expected_whitelist_string % (event_data[1], self.log_lines[1]))
def get_model():
    """Return a model to parse Syslogs from the AIT-LDS."""
    alphabet = b'!"#$%&\'()*+,-./0123456789:;<>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz{|}~=[]'

    user_info = SequenceModelElement('user_info', [
        FixedDataModelElement('user_str', b'user=<'),
        OptionalMatchModelElement(
            'user', DelimitedDataModelElement('user', b'>')
            ),
        FixedDataModelElement('method_str', b'>'),
        OptionalMatchModelElement(
            'method', SequenceModelElement('method', [
                FixedDataModelElement('method_str', b', method='),
                DelimitedDataModelElement('method', b','),
                ])
            ),
        FixedDataModelElement('rip_str', b', rip='),
        IpAddressDataModelElement('rip'),
        FixedDataModelElement('lip_str', b', lip='),
        IpAddressDataModelElement('lip'),
        OptionalMatchModelElement(
            'mpid', SequenceModelElement('mpid', [
                FixedDataModelElement('mpid_str', b', mpid='),
                DecimalIntegerValueModelElement('mpid'),
                ])
            ),
        OptionalMatchModelElement(
            'secured', FixedDataModelElement('secured_str', b', secured')
            ),
        FixedDataModelElement('session_str', b', session=<'),
        DelimitedDataModelElement('session', b'>'),
        FixedDataModelElement('bracket_str', b'>'),
        ])

    model = SequenceModelElement('model', [
        DateTimeModelElement('time', b'%b %d %H:%M:%S'),
        FixedDataModelElement('sp1', b' '),
        DelimitedDataModelElement('host', b' '),
        FirstMatchModelElement('service', [
            SequenceModelElement('dovecot', [
                FixedDataModelElement('dovecot_str', b' dovecot: '),
                FirstMatchModelElement('imap', [
                    SequenceModelElement('imap', [
                        FixedDataModelElement('imap_str', b'imap('),
                        DelimitedDataModelElement('user', b')'),
                        FixedDataModelElement('bracket_str', b'): '),
                        FirstMatchModelElement('message', [
                            SequenceModelElement('logout', [
                                FixedDataModelElement('logout_str', b'Logged out in='),
                                DecimalIntegerValueModelElement('in'),
                                FixedDataModelElement('out_str', b' out='),
                                DecimalIntegerValueModelElement('out')
                                ]),
                            SequenceModelElement('err_mail', [
                                FixedDataModelElement('mail_str', b'Error: Failed to autocreate mailbox INBOX: Internal error occurred. '
                                                      b'Refer to server log for more information. ['),
                                DelimitedDataModelElement('err_time', b']'),
                                FixedDataModelElement('brack', b']')
                                ]),
                            SequenceModelElement('err_open', [
                                FixedDataModelElement('err_str', b'Error: '),
                                DelimitedDataModelElement('function_name', b'('),
                                FixedDataModelElement('brack_str1', b'('),
                                DelimitedDataModelElement('arg', b')'),
                                FixedDataModelElement('failed_str', b') failed: Permission denied (euid='),
                                DecimalIntegerValueModelElement('euid'),
                                FixedDataModelElement('brack_str2', b'('),
                                DelimitedDataModelElement('euid_user', b')'),
                                FixedDataModelElement('egid_str', b') egid='),
                                DecimalIntegerValueModelElement('egid'),
                                FixedDataModelElement('brack_str3', b'('),
                                DelimitedDataModelElement('egid_user', b')'),
                                FixedDataModelElement('perm_str', b') missing +w perm: '),
                                DelimitedDataModelElement('mail_path', b','),
                                FixedDataModelElement('group_str', b', we\'re not in group '),
                                DecimalIntegerValueModelElement('group_id'),
                                FixedDataModelElement('brack_str4', b'('),
                                DelimitedDataModelElement('group_name', b')'),
                                FixedDataModelElement('owned_str', b'), dir owned by '),
                                DelimitedDataModelElement('owner', b' '),
                                FixedDataModelElement('mode_str', b' mode='),
                                DelimitedDataModelElement('mode', b')'),
                                FixedDataModelElement('brack_str5', b')'),
                                OptionalMatchModelElement(
                                    'set', SequenceModelElement('set', [
                                        FixedDataModelElement('set_str', b' (set'),
                                        DelimitedDataModelElement('param', b'='),
                                        FixedDataModelElement('equal_str', b'='),
                                        DelimitedDataModelElement('val', b')'),
                                        FixedDataModelElement('brack_str6', b')')
                                        ])
                                    )
                                ]),
                            SequenceModelElement('err_mail', [
                                FixedDataModelElement('mail_str', b'Failed to autocreate mailbox INBOX: Internal error occurred. '
                                                      b'Refer to server log for more information. ['),
                                DelimitedDataModelElement('err_time', b']'),
                                FixedDataModelElement('brack', b']')
                                ]),
                            ]),
                        ]),
                    SequenceModelElement('imap_login', [
                        FixedDataModelElement('imap_login_str', b'imap-login: '******'login', [
                            SequenceModelElement('disconnected_str', [
                                FixedDataModelElement('disconnected_str', b'Disconnected '),
                                FirstMatchModelElement('auth', [
                                    SequenceModelElement('auth_failed', [
                                        FixedDataModelElement('auth_failed_str', b'(auth failed, '),
                                        DecimalIntegerValueModelElement('attempts'),
                                        FixedDataModelElement('attempts_str', b' attempts in '),
                                        ]),
                                    FixedDataModelElement('no_auth_str', b'(no auth attempts in '),
                                    FixedDataModelElement('no_auth_str', b'(disconnected before auth was ready, waited '),
                                    ]),
                                DecimalIntegerValueModelElement('duration'),
                                FixedDataModelElement('secs_str', b' secs): '),
                                user_info
                                ]),
                            SequenceModelElement('login', [
                                FixedDataModelElement('login_str', b'Login: '******'anvil', [
                                FixedDataModelElement('anvil_str', b'Error: anvil:'),
                                AnyByteDataModelElement('anvil_msg')
                                ]),
                            SequenceModelElement('auth_responding', [
                                FixedDataModelElement('auth_responding_str', b'Warning: Auth process not responding, '
                                                      b'delayed sending initial response (greeting): '),
                                user_info
                                ]),
                            ]),
                        ]),
                    SequenceModelElement('auth', [
                        FixedDataModelElement('auth_worker_str', b'auth: '),
                        AnyByteDataModelElement('message')
                        ]),
                    SequenceModelElement('auth_worker', [
                        FixedDataModelElement('auth_worker_str', b'auth-worker('),
                        DecimalIntegerValueModelElement('pid'),
                        FixedDataModelElement('brack', b'):'),
                        AnyByteDataModelElement('message')
                        ]),
                    SequenceModelElement('master', [
                        FixedDataModelElement('master_str', b'master: '),
                        AnyByteDataModelElement('message')
                        ])
                    ]),
                ]),
            SequenceModelElement('horde', [
                FixedDataModelElement('horde_str', b' HORDE: '),
                FirstMatchModelElement('horde', [
                    SequenceModelElement('imp', [
                        FixedDataModelElement('succ_str', b'[imp] '),
                        FirstMatchModelElement('imp', [
                            SequenceModelElement('login', [
                                FixedDataModelElement('succ_str', b'Login success for '),
                                DelimitedDataModelElement('user', b' '),
                                FixedDataModelElement('brack_str1', b' ('),
                                IpAddressDataModelElement('ip'),
                                FixedDataModelElement('to_str', b') to {'),
                                DelimitedDataModelElement('imap_addr', b'}'),
                                FixedDataModelElement('brack_str2', b'}'),
                                ]),
                            SequenceModelElement('message_sent', [
                                FixedDataModelElement('message_sent_str', b'Message sent to '),
                                VariableByteDataModelElement('user', b'.0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz'),
                                OptionalMatchModelElement(
                                    'mail', SequenceModelElement('mail', [
                                        FixedDataModelElement('brack_mail1', b' <'),
                                        DelimitedDataModelElement('mail', b'>'),
                                        FixedDataModelElement('brack_mail2', b'>')
                                        ])
                                    ),
                                RepeatedElementDataModelElement(
                                    'more_recepients_rep', SequenceModelElement('more_recepients', [
                                        FixedDataModelElement('comma_str', b', '),
                                        VariableByteDataModelElement('more_recepients_mail',
                                                                     b'.0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz'),
                                        OptionalMatchModelElement(
                                            'more_recepients_mail', SequenceModelElement('more_recepients', [
                                                FixedDataModelElement('brack_more_recepients1', b' <'),
                                                DelimitedDataModelElement('more_recepients', b'>'),
                                                FixedDataModelElement('brack_more_recepients2', b'>')
                                                ])
                                            ),
                                        ])
                                    ),
                                FixedDataModelElement('from_str', b' from '),
                                DelimitedDataModelElement('user', b' '),
                                FixedDataModelElement('brack_str1', b' ('),
                                IpAddressDataModelElement('ip'),
                                FixedDataModelElement('brack_str2', b')'),
                                ]),
                            SequenceModelElement('login_failed', [
                                FixedDataModelElement('succ_str', b'FAILED LOGIN for '),
                                DelimitedDataModelElement('user', b' '),
                                FixedDataModelElement('brack_str1', b' ('),
                                IpAddressDataModelElement('ip'),
                                FixedDataModelElement('to_str', b') to {'),
                                DelimitedDataModelElement('imap_addr', b'}'),
                                FixedDataModelElement('brack_str2', b'}'),
                                ]),
                            SequenceModelElement('status', [
                                FixedDataModelElement('status_str', b'[status] Could not open mailbox "INBOX".'),
                                ]),
                            SequenceModelElement('sync_token', [
                                FixedDataModelElement('sync_token_str', b'[getSyncToken] IMAP error reported by server.'),
                                ]),
                            SequenceModelElement('auth_failed', [
                                FixedDataModelElement('auth_failed_str', b'[login] Authentication failed.'),
                                ]),
                            ]),
                        ]),
                    SequenceModelElement('horde', [
                        FixedDataModelElement('succ_str', b'[horde] '),
                        FirstMatchModelElement('horde', [
                            SequenceModelElement('success', [
                                FixedDataModelElement('success_str', b'Login success for '),
                                DelimitedDataModelElement('user', b' '),
                                FixedDataModelElement('brack_str1', b' to horde ('),
                                IpAddressDataModelElement('ip'),
                                FixedDataModelElement('brack_str2', b')'),
                                ]),
                            SequenceModelElement('success', [
                                FixedDataModelElement('success_str', b'User '),
                                DelimitedDataModelElement('user', b' '),
                                FixedDataModelElement('brack_str1', b' logged out of Horde ('),
                                IpAddressDataModelElement('ip'),
                                FixedDataModelElement('brack_str2', b')'),
                                ]),
                            SequenceModelElement('login_failed', [
                                FixedDataModelElement('failed_str', b'FAILED LOGIN for '),
                                DelimitedDataModelElement('user', b' '),
                                FixedDataModelElement('to_horde_str', b' to horde ('),
                                IpAddressDataModelElement('ip'),
                                FixedDataModelElement('brack_str', b')'),
                                ]),
                            ])
                        ]),
                    SequenceModelElement('function', [
                        FixedWordlistDataModelElement('horde_function', [b'[nag]', b'[turba]', b'[horde]']),
                        FixedDataModelElement('nag_str', b' PHP ERROR: '),
                        FirstMatchModelElement('php_error', [
                            SequenceModelElement('declaration', [
                                FixedDataModelElement('declaration_str', b'Declaration of '),
                                DelimitedDataModelElement('function_name1', b'('),
                                FixedDataModelElement('brack_str1', b'('),
                                OptionalMatchModelElement(
                                    'arg1', DelimitedDataModelElement('arg1', b')')
                                    ),
                                FixedDataModelElement('failed_str', b') should be compatible with '),
                                DelimitedDataModelElement('function_name2', b'('),
                                FixedDataModelElement('brack_str2', b'('),
                                OptionalMatchModelElement(
                                    'arg2', DelimitedDataModelElement('arg2', b')')
                                    ),
                                FixedDataModelElement('brack_str3', b')'),
                                ]),
                            FixedDataModelElement('file_str', b'finfo_file(): Empty filename or path'),
                            FixedDataModelElement('header_str', b'Cannot modify header information - headers already sent')
                            ])
                        ]),
                    SequenceModelElement('guest', [
                        FixedDataModelElement('guest_str', b'Guest user is not authorized for Horde (Host: '),
                        IpAddressDataModelElement('ip'),
                        FixedDataModelElement('brack_str', b').')
                        ]),
                    SequenceModelElement('php_error', [
                        FixedDataModelElement('php_error_str', b'PHP ERROR: '),
                        DelimitedDataModelElement('msg', b' ['),
                        ]),
                    SequenceModelElement('free_msg', [
                        DelimitedDataModelElement('msg', b' ['),
                        ])
                    ]),
                FixedDataModelElement('to_str', b' [pid '),
                DecimalIntegerValueModelElement('pid'),
                FixedDataModelElement('line_str', b' on line '),
                DecimalIntegerValueModelElement('line'),
                FixedDataModelElement('of_str', b' of "'),
                DelimitedDataModelElement('path', b'"'),
                FixedDataModelElement('brack_str', b'"]')
                ]),
            SequenceModelElement('cron', [
                FixedDataModelElement('cron_str', b' CRON['),
                DecimalIntegerValueModelElement('pid'),
                FixedDataModelElement('brack_str1', b']: '),
                FirstMatchModelElement('cron', [
                    SequenceModelElement('cmd', [
                        FixedDataModelElement('brack_str', b'('),
                        DelimitedDataModelElement('user', b')'),
                        FixedDataModelElement('cmd_str', b') CMD '),
                        AnyByteDataModelElement('cmd_msg')
                        ]),
                    SequenceModelElement('session', [  # This only occurs in auth.log
                        DelimitedDataModelElement('pam', b'('),
                        FixedDataModelElement('brack_str', b'('),
                        DelimitedDataModelElement('name', b')'),
                        FixedDataModelElement('session_str', b'): session '),
                        FixedWordlistDataModelElement('status', [b'opened', b'closed']),
                        FixedDataModelElement('user_str', b' for user '),
                        VariableByteDataModelElement('user', alphabet),
                        OptionalMatchModelElement(
                            'uid', SequenceModelElement('uid', [
                                FixedDataModelElement('uid_str', b' by (uid='),
                                DecimalIntegerValueModelElement('uid'),
                                FixedDataModelElement('brack_str', b')')
                                ])
                            )
                        ])
                    ])
                ]),
            SequenceModelElement('auth', [  # This only occurs in auth.log
                FixedDataModelElement('auth_str', b' auth: '),
                DelimitedDataModelElement('pam', b'('),
                FixedDataModelElement('brack_str', b'('),
                DelimitedDataModelElement('name', b')'),
                FixedDataModelElement('session_str', b'): authentication failure; logname='),
                OptionalMatchModelElement(
                    'logname', DelimitedDataModelElement('logname', b' ')
                    ),
                FixedDataModelElement('uid_str', b' uid='),
                DecimalIntegerValueModelElement('uid'),
                FixedDataModelElement('euid_str', b' euid='),
                DecimalIntegerValueModelElement('euid'),
                FixedDataModelElement('tty_str', b' tty='),
                DelimitedDataModelElement('tty', b' '),
                FixedDataModelElement('ruser_str', b' ruser='******'ruser', b' '),
                FixedDataModelElement('rhost_str', b' rhost='),
                IpAddressDataModelElement('rhost'),
                OptionalMatchModelElement(
                    'user', SequenceModelElement('user', [
                        FixedDataModelElement('user_str', b'  user='******'user', alphabet)
                        ])
                    )
                ]),
            SequenceModelElement('systemd', [
                FixedDataModelElement('systemd_str', b' systemd['),
                DecimalIntegerValueModelElement('pid'),
                FixedDataModelElement('brack_str1', b']: '),
                FirstMatchModelElement('systemd', [
                    FixedDataModelElement('php_starting_str', b'Starting Clean php session files...'),
                    FixedDataModelElement('php_started_str', b'Started Clean php session files.'),
                    FixedDataModelElement('php_starting_str', b'Starting Cleanup of Temporary Directories...'),
                    FixedDataModelElement('php_started_str', b'Started Cleanup of Temporary Directories.'),
                    FixedDataModelElement('php_started_str', b'Starting Daily apt upgrade and clean activities...'),
                    FixedDataModelElement('php_started_str', b'Started Daily apt upgrade and clean activities.'),
                    FixedDataModelElement('php_started_str', b'Starting Daily apt download activities...'),
                    FixedDataModelElement('php_started_str', b'Started Daily apt download activities.'),
                    FixedDataModelElement('php_started_str', b'Starting Security Auditing Service...'),
                    FixedDataModelElement('php_started_str', b'Started Security Auditing Service.'),
                    FixedDataModelElement('php_started_str', b'Stopping Security Auditing Service...'),
                    FixedDataModelElement('php_started_str', b'Stopped Security Auditing Service.'),
                    FixedDataModelElement('php_started_str', b'Reloading The Apache HTTP Server.'),
                    FixedDataModelElement('php_started_str', b'Reloaded The Apache HTTP Server.'),
                    FixedDataModelElement('php_started_str', b'Mounting Arbitrary Executable File Formats File System...'),
                    FixedDataModelElement('php_started_str', b'Mounted Arbitrary Executable File Formats File System.'),
                    SequenceModelElement('apt', [
                        FixedDataModelElement('apt_str', b'apt'),
                        AnyByteDataModelElement('apt_msg')
                        ]),
                    SequenceModelElement('service', [
                        DelimitedDataModelElement('service', b':'),
                        FixedDataModelElement('col_str', b': '),
                        AnyByteDataModelElement('_msg')
                        ])
                    ]),
                ]),
            SequenceModelElement('kernel', [
                FixedDataModelElement('kernel_str', b' kernel'),
                OptionalMatchModelElement(
                    'id', SequenceModelElement('id', [
                        FixedDataModelElement('brack_str', b'['),
                        DecimalIntegerValueModelElement('id'),
                        FixedDataModelElement('brack_str2', b']')
                        ])
                    ),
                FixedDataModelElement('col_str', b': '),
                AnyByteDataModelElement('kernel_msg')
                ]),
            SequenceModelElement('augenrules', [
                FixedDataModelElement('augenrules_str', b' augenrules['),
                DecimalIntegerValueModelElement('id'),
                FixedDataModelElement('brack_str1', b']: '),
                AnyByteDataModelElement('augenrules_msg')
                ]),
            SequenceModelElement('auditd', [
                FixedDataModelElement('auditd_str', b' auditd['),
                DecimalIntegerValueModelElement('id'),
                FixedDataModelElement('brack_str1', b']: '),
                AnyByteDataModelElement('auditd_msg')
                ]),
            SequenceModelElement('auditd2', [
                FixedDataModelElement('auditd2_str', b' auditd: '),
                AnyByteDataModelElement('auditd_msg')
                ]),
            SequenceModelElement('audispd', [
                FixedDataModelElement('audispd_str', b' audispd: '),
                AnyByteDataModelElement('audispd_msg')
                ]),
            SequenceModelElement('liblogging', [
                FixedDataModelElement('liblogging_str', b' liblogging-stdlog: '),
                AnyByteDataModelElement('liblogging_msg')
                ]),
            SequenceModelElement('freshclam', [
                FixedDataModelElement('freshclam_str', b' freshclam['),
                DecimalIntegerValueModelElement('id'),
                FixedDataModelElement('brack_str1', b']: '),
                AnyByteDataModelElement('freshclam_msg')
                ]),
            SequenceModelElement('dhclient', [
                FixedDataModelElement('dhclient_str', b' dhclient['),
                DecimalIntegerValueModelElement('id'),
                FixedDataModelElement('brack_str1', b']: '),
                FirstMatchModelElement('dhclient', [
                    SequenceModelElement('dhcprequest', [
                        FixedDataModelElement('dhcprequest_str', b'DHCPREQUEST of '),
                        IpAddressDataModelElement('src_ip'),
                        FixedDataModelElement('on_str', b' on '),
                        DelimitedDataModelElement('network_interface', b' '),
                        FixedDataModelElement('to_str', b' to '),
                        IpAddressDataModelElement('dst_ip'),
                        FixedDataModelElement('port_str', b' port '),
                        DecimalIntegerValueModelElement('port')
                    ]),
                    SequenceModelElement('dhcpack', [
                        FixedDataModelElement('dhcpack_str', b'DHCPACK of '),
                        IpAddressDataModelElement('dst_ip'),
                        FixedDataModelElement('on_str', b' from '),
                        IpAddressDataModelElement('src_ip')
                    ]),
                    SequenceModelElement('bound', [
                        FixedDataModelElement('bound_str', b'bound to '),
                        IpAddressDataModelElement('ip'),
                        FixedDataModelElement('renewal_str', b' -- renewal in '),
                        DecimalIntegerValueModelElement('seconds'),
                        FixedDataModelElement('seconds_str', b' seconds.')
                        ]),
                    ]),
                ]),
            ])
        ])

    return model
Example #10
0
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct AMiner how to process incoming data streams to create log atoms from them.
    """
    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement(
            'Space',
            b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'
        ),
        DelimitedDataModelElement('Data', b'%'),
        AnyByteDataModelElement('Rest')
    ]

    service_children_login_details = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status',
                                      [b' logged in', b' logged out']),
        OptionalMatchModelElement(
            'PastTime',
            SequenceModelElement('Time', [
                FixedDataModelElement('Blank', b' '),
                DecimalIntegerValueModelElement('Minutes'),
                FixedDataModelElement('Ago', b' minutes ago.')
            ]))
    ]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')
    ]

    service_children_random_time = [
        FixedDataModelElement('Space', b'Random: '),
        DecimalIntegerValueModelElement('Random')
    ]

    service_children_sensors = [
        SequenceModelElement('CPUTemp', [
            FixedDataModelElement('FixedTemp', b'CPU Temp: '),
            DecimalIntegerValueModelElement('Temp'),
            FixedDataModelElement('Degrees', b'\xc2\xb0C')
        ]),
        FixedDataModelElement('Space1', b', '),
        SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('FixedWorkload', b'CPU Workload: '),
            DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')
        ]),
        FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)
    ]

    service_children_user_ip_address = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '),
        IpAddressDataModelElement('IP')
    ]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Start Time', b'\' in 5 min.')
    ]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')
    ]

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement',
                             service_children_cron_job_announcement),
        SequenceModelElement('CronExecution',
                             service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job),
        SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details),
        DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time),
        SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
        [atom_filter])
    analysis_context.register_component(
        simple_monotonic_timestamp_adjust,
        component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    anomaly_event_handlers = [stream_printer_event_handler]

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_monotonic_timestamp_adjust],
        anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.input import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler,
                            stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(
        analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(
        timestamps_unsorted_detector,
        component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis import AllowlistViolationDetector
    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule(
                    '/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(
                    Rules.ValueMatchRule('/model/LoginDetails/Username',
                                         b'root'))
            ]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(
                    Rules.PathExistsMatchRule(
                        '/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails')
            ]),
            Rules.NegationMatchRule(
                Rules.PathExistsMatchRule('/model/LoginDetails'))
        ])
    ]

    allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, allowlist_rules,
        anomaly_event_handlers)
    analysis_context.register_component(allowlist_violation_detector,
                                        component_name="Allowlist")
    atom_filter.add_handler(allowlist_violation_detector)

    from aminer.analysis import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10000th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(
            tuple(match_value_list))
        if extra_data is not None:
            mod = 10000
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/UName', '/model/DailyCron/Job Number'],
        anomaly_event_handlers,
        auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function)
    analysis_context.register_component(
        enhanced_new_match_path_value_combo_detector,
        component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(
        86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(
        analysis_context.aminer_config,
        [('/model/RandomTime/Random', modulo_time_bin_definition),
         ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers)
    analysis_context.register_component(histogram_analysis,
                                        component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime',
        modulo_time_bin_definition, 10, anomaly_event_handlers)
    analysis_context.register_component(
        path_dependent_histogram_analysis,
        component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(
        analysis_context.aminer_config, anomaly_event_handlers, None,
        ['/model/Random'], 100, 10)
    analysis_context.register_component(
        match_value_average_change_detector,
        component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(sys.stdout, [
        '/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload',
        '/model/Sensors/DTM'
    ], b';', b'')
    analysis_context.register_component(
        match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(
        new_match_path_value_combo_detector,
        component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/JobNumber', '/model/IPAddresses/Username'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config,
        '/model/DiskReport/Space',
        anomaly_event_handlers,
        auto_include_flag=True,
        default_interval=2,
        realert_interval=5)
    analysis_context.register_component(missing_match_path_value_detector,
                                        component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        2,
        min_rule_attributes=1,
        max_rule_attributes=5,
        record_count_before_event=70000,
        output_log_line=True)
    analysis_context.register_component(
        time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule(
        'CronJobAnnouncement',
        5,
        6,
        max_artefacts_a_for_single_b=1,
        artefact_match_parameters=[('/model/CronAnnouncement/JobNumber',
                                    '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement',
                                          [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None,
                                          [cron_job_announcement])
    rules = [
        Rules.PathExistsMatchRule('/model/CronAnnouncement/Run',
                                  a_class_selector),
        Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)
    ]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(
        analysis_context.aminer_config, rules, anomaly_event_handlers)
    analysis_context.register_component(
        time_correlation_violation_detector,
        component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)
def getModel():
  """This function defines how to parse a audispd message logged
via syslog after any standard logging preamble, e.g. from syslog."""

  class ExecArgumentDataModelElement(object):
    """This is a helper class for parsing the (encoded) exec argument
    strings found within audit logs."""
    def __init__(self, elementId):
      self.elementId = elementId

    def getChildElements(self):
      """Get the children of this element (none)."""
      return None

    def getMatchElement(self, path, matchContext):
      """Find the maximum number of bytes belonging to an exec
      argument.
      @return a match when at least two bytes were found including
      the delimiters."""
      data = matchContext.matchData
      matchLen = 0
      matchValue = b''
      if data[0] == ord(b'"'):
        matchLen = data.find(b'"', 1)
        if matchLen == -1:
          return None
        matchValue = data[1:matchLen]
        matchLen += 1
      elif data.startswith(b'(null)'):
        matchLen = 6
        matchValue = None
      else:
# Must be upper case hex encoded:
        nextValue = -1
        for dByte in data:
          if (dByte >= 0x30) and (dByte <= 0x39):
            dByte -= 0x30
          elif (dByte >= 0x41) and (dByte <= 0x46):
            dByte -= 0x37
          else:
            break
          if nextValue == -1:
            nextValue = (dByte<<4)
          else:
            matchValue += bytearray(((nextValue|dByte),))
            nextValue = -1
          matchLen += 1
        if nextValue != -1:
          return None

      matchData = data[:matchLen]
      matchContext.update(matchData)
      return MatchElement(
          "%s/%s" % (path, self.elementId), matchData, matchValue, None)

  pamStatusWordList = FixedWordlistDataModelElement(
      'status', [b'failed', b'success'])


  typeBranches = {}
  typeBranches['ADD_USER'] = SequenceModelElement('adduser', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=adding user id='),
      DecimalIntegerValueModelElement('newuserid'),
      FixedDataModelElement('s5', b' exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s6', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s7', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s8', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s9', b' res=success\'')
  ])

  typeBranches['BPRM_FCAPS'] = SequenceModelElement('bprmfcaps', [
      FixedDataModelElement('s0', b' fver=0 fp='),
      HexStringModelElement('fp'),
      FixedDataModelElement('s1', b' fi='),
      HexStringModelElement('fi'),
      FixedDataModelElement('s2', b' fe='),
      HexStringModelElement('fe'),
      FixedDataModelElement('s3', b' old_pp='),
      HexStringModelElement('pp-old'),
      FixedDataModelElement('s4', b' old_pi='),
      HexStringModelElement('pi-old'),
      FixedDataModelElement('s5', b' old_pe='),
      HexStringModelElement('pe-old'),
      FixedDataModelElement('s6', b' new_pp='),
      HexStringModelElement('pp-new'),
      FixedDataModelElement('s7', b' new_pi='),
      HexStringModelElement('pi-new'),
      FixedDataModelElement('s8', b' new_pe='),
      HexStringModelElement('pe-new')
  ])

  typeBranches['CONFIG_CHANGE'] = SequenceModelElement('conf-change', [
      FixedDataModelElement('s0', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s1', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s2', b' op="add rule" key=(null) list='),
      DecimalIntegerValueModelElement('list'),
      FixedDataModelElement('s3', b' res='),
      DecimalIntegerValueModelElement('result')
  ])

  typeBranches['CRED_ACQ'] = SequenceModelElement('credacq', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=PAM:setcred acct="'),
      DelimitedDataModelElement('username', b'"'),
      FixedDataModelElement('s5', b'" exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s6', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s7', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s8', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s9', b' res=success\'')
  ])

  typeBranches['CRED_DISP'] = SequenceModelElement('creddisp', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=PAM:setcred acct="'),
      DelimitedDataModelElement('username', b'"'),
      FixedDataModelElement('s5', b'" exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s6', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s7', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s8', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s9', b' res=success\'')
  ])

  typeBranches['CRED_REFR'] = SequenceModelElement('creddisp', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=PAM:setcred acct="root" ' \
          b'exe="/usr/sbin/sshd" hostname='),
      IpAddressDataModelElement('clientname'),
      FixedDataModelElement('s5', b' addr='),
      IpAddressDataModelElement('clientip'),
      FixedDataModelElement('s6', b' terminal=ssh res=success\'')])

  typeBranches['CWD'] = SequenceModelElement('cwd', [
      FixedDataModelElement('s0', b'  cwd='),
      ExecArgumentDataModelElement('cwd')])

# We need a type branch here also, but there is no additional
# data in EOE records after Ubuntu Trusty any more.
  typeBranches['EOE'] = OptionalMatchModelElement(
      'eoe', FixedDataModelElement('s0', b''))

  execArgModel = SequenceModelElement('execarg', [
      FixedDataModelElement('s0', b' a'),
      DecimalIntegerValueModelElement('argn'),
      FixedDataModelElement('s1', b'='),
      ExecArgumentDataModelElement('argval')])

  typeBranches['EXECVE'] = SequenceModelElement('execve', [
      FixedDataModelElement('s0', b' argc='),
      DecimalIntegerValueModelElement('argc'),
      RepeatedElementDataModelElement('arg', execArgModel)])

  typeBranches['FD_PAIR'] = SequenceModelElement('fdpair', [
      FixedDataModelElement('s0', b' fd0='),
      DecimalIntegerValueModelElement('fd0'),
      FixedDataModelElement('s1', b' fd1='),
      DecimalIntegerValueModelElement('fd1')])

# This message differs on Ubuntu 32/64 bit variants.
  typeBranches['LOGIN'] = SequenceModelElement('login', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedWordlistDataModelElement('s2', [b' old auid=', b' old-auid=']),
      DecimalIntegerValueModelElement('auid-old'),
      FixedWordlistDataModelElement('s3', [b' new auid=', b' auid=']),
      DecimalIntegerValueModelElement('auid-new'),
      FixedWordlistDataModelElement('s4', [b' old ses=', b' old-ses=']),
      DecimalIntegerValueModelElement('ses-old'),
      FixedWordlistDataModelElement('s5', [b' new ses=', b' ses=']),
      DecimalIntegerValueModelElement('ses-new'),
      FixedDataModelElement('s6', b' res='),
      DecimalIntegerValueModelElement('result')])

  inodeInfoModelElement = SequenceModelElement('inodeinfo', [
      FixedDataModelElement('s0', b' inode='),
      DecimalIntegerValueModelElement('inode'),
      FixedDataModelElement('s1', b' dev='),
      # A special major/minor device element could be better here.
      VariableByteDataModelElement('dev', b'0123456789abcdef:'),
      FixedDataModelElement('s2', b' mode='),
      # FIXME: is octal
      DecimalIntegerValueModelElement('mode'),
      FixedDataModelElement('s3', b' ouid='),
      DecimalIntegerValueModelElement('ouid'),
      FixedDataModelElement('s4', b' ogid='),
      DecimalIntegerValueModelElement('ogid'),
      FixedDataModelElement('s5', b' rdev='),
      # A special major/minor device element could be better here (see above).
      VariableByteDataModelElement('rdev', b'0123456789abcdef:'),
      FixedDataModelElement('s6', b' nametype=')])

  typeBranches['NETFILTER_CFG'] = SequenceModelElement('conf-change', [
      FixedDataModelElement('s0', b' table='),
      FixedWordlistDataModelElement('table', [b'filter', b'mangle', b'nat']),
      FixedDataModelElement('s1', b' family='),
      DecimalIntegerValueModelElement('family'),
      FixedDataModelElement('s2', b' entries='),
      DecimalIntegerValueModelElement('entries')
  ])

  typeBranches['OBJ_PID'] = SequenceModelElement('objpid', [
      FixedDataModelElement('s0', b' opid='),
      DecimalIntegerValueModelElement('opid'),
      FixedDataModelElement('s1', b' oauid='),
      DecimalIntegerValueModelElement(
          'oauid',
          valueSignType=DecimalIntegerValueModelElement.SIGN_TYPE_OPTIONAL),
      FixedDataModelElement('s2', b' ouid='),
      DecimalIntegerValueModelElement('ouid'),
      FixedDataModelElement('s3', b' oses='),
      DecimalIntegerValueModelElement(
          'oses',
          valueSignType=DecimalIntegerValueModelElement.SIGN_TYPE_OPTIONAL),
      FixedDataModelElement('s4', b' ocomm='),
      ExecArgumentDataModelElement('ocomm'),
  ])

  typeBranches['PATH'] = SequenceModelElement('path', [
      FixedDataModelElement('s0', b' item='),
      DecimalIntegerValueModelElement('item'),
      FixedDataModelElement('s1', b' name='),
      ExecArgumentDataModelElement('name'),
      FirstMatchModelElement('fsinfo', [
          inodeInfoModelElement,
          FixedDataModelElement('noinfo', b' nametype=')]),
      FixedWordlistDataModelElement(
          'nametype', [b'CREATE', b'DELETE', b'NORMAL', b'PARENT', b'UNKNOWN']),
  ])

  typeBranches['PROCTITLE'] = SequenceModelElement('proctitle', [
      FixedDataModelElement('s1', b' proctitle='),
      ExecArgumentDataModelElement('proctitle')])

  typeBranches['SERVICE_START'] = SequenceModelElement('service', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'unit='),
      DelimitedDataModelElement('unit', b' '),
      FixedDataModelElement('s5', b' comm="systemd" exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s6', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s7', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s8', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s9', b' res='),
      pamStatusWordList,
      FixedDataModelElement('s10', b'\'')
  ])
  typeBranches['SERVICE_STOP'] = typeBranches['SERVICE_START']

  typeBranches['SOCKADDR'] = SequenceModelElement('sockaddr', [
      FixedDataModelElement('s0', b' saddr='),
      HexStringModelElement('sockaddr', upperCase=True),
  ])

  typeBranches['SYSCALL'] = SequenceModelElement('syscall', [
      FixedDataModelElement('s0', b' arch='),
      HexStringModelElement('arch'),
      FixedDataModelElement('s1', b' syscall='),
      DecimalIntegerValueModelElement('syscall'),
      OptionalMatchModelElement('personality', SequenceModelElement('pseq', [
          FixedDataModelElement('s0', b' per='),
          DecimalIntegerValueModelElement('personality'),
      ])),
      OptionalMatchModelElement('result', SequenceModelElement('rseq', [
          FixedDataModelElement('s2', b' success='),
          FixedWordlistDataModelElement('succes', [b'no', b'yes']),
          FixedDataModelElement('s3', b' exit='),
          DecimalIntegerValueModelElement(
              'exit',
              valueSignType=DecimalIntegerValueModelElement.SIGN_TYPE_OPTIONAL),
      ])),
      FixedDataModelElement('s4', b' a0='),
      HexStringModelElement('arg0'),
      FixedDataModelElement('s5', b' a1='),
      HexStringModelElement('arg1'),
      FixedDataModelElement('s6', b' a2='),
      HexStringModelElement('arg2'),
      FixedDataModelElement('s7', b' a3='),
      HexStringModelElement('arg3'),
      FixedDataModelElement('s8', b' items='),
      DecimalIntegerValueModelElement('items'),
      FixedDataModelElement('s9', b' ppid='),
      DecimalIntegerValueModelElement('ppid'),
      FixedDataModelElement('s10', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s11', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s12', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s13', b' gid='),
      DecimalIntegerValueModelElement('gid'),
      FixedDataModelElement('s14', b' euid='),
      DecimalIntegerValueModelElement('euid'),
      FixedDataModelElement('s15', b' suid='),
      DecimalIntegerValueModelElement('suid'),
      FixedDataModelElement('s16', b' fsuid='),
      DecimalIntegerValueModelElement('fsuid'),
      FixedDataModelElement('s17', b' egid='),
      DecimalIntegerValueModelElement('egid'),
      FixedDataModelElement('s18', b' sgid='),
      DecimalIntegerValueModelElement('sgid'),
      FixedDataModelElement('s19', b' fsgid='),
      DecimalIntegerValueModelElement('fsgid'),
      FixedDataModelElement('s20', b' tty='),
      DelimitedDataModelElement('tty', b' '),
      FixedDataModelElement('s21', b' ses='),
      DecimalIntegerValueModelElement('sesid'),
      FixedDataModelElement('s22', b' comm='),
      ExecArgumentDataModelElement('command'),
      FixedDataModelElement('s23', b' exe="'),
      DelimitedDataModelElement('executable', b'"'),
      FixedDataModelElement('s24', b'" key='),
      AnyByteDataModelElement('key')
  ])

# The UNKNOWN type is used then audispd does not know the type
# of the event, usually because the kernel is more recent than
# audispd, thus emiting yet unknown event types.
# * type=1327: procitle: see https://www.redhat.com/archives/linux-audit/2014-February/msg00047.html
  typeBranches['UNKNOWN[1327]'] = SequenceModelElement('unknown-proctitle', [
      FixedDataModelElement('s0', b' proctitle='),
      ExecArgumentDataModelElement('proctitle')
  ])

  typeBranches['USER_ACCT'] = SequenceModelElement('useracct', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=PAM:accounting acct="'),
      DelimitedDataModelElement('username', b'"'),
      FixedDataModelElement('s5', b'" exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s6', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s7', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s8', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s9', b' res=success\'')
  ])

  typeBranches['USER_AUTH'] = SequenceModelElement('userauth', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=PAM:authentication acct="'),
      DelimitedDataModelElement('username', b'"'),
      FixedDataModelElement('s5', b'" exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s6', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s7', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s8', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s9', b' res=success\'')
  ])

  typeBranches['USER_START'] = SequenceModelElement('userstart', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=PAM:session_open acct="'),
      DelimitedDataModelElement('username', b'"'),
      FixedDataModelElement('s5', b'" exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s6', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s7', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s8', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s9', b' res=success\'')
  ])

  typeBranches['USER_END'] = SequenceModelElement('userend', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=PAM:session_close acct="'),
      DelimitedDataModelElement('username', b'"'),
      FixedDataModelElement('s5', b'" exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s6', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s7', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s8', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s9', b' res=success\'')
  ])

  typeBranches['USER_ERR'] = SequenceModelElement('usererr', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=PAM:bad_ident acct="?" exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s5', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s6', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s7', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s8', b' res=failed\'')
  ])

  typeBranches['USER_LOGIN'] = SequenceModelElement('userlogin', [
      FixedDataModelElement('s0', b' pid='),
      DecimalIntegerValueModelElement('pid'),
      FixedDataModelElement('s1', b' uid='),
      DecimalIntegerValueModelElement('uid'),
      FixedDataModelElement('s2', b' auid='),
      DecimalIntegerValueModelElement('auid'),
      FixedDataModelElement('s3', b' ses='),
      DecimalIntegerValueModelElement('ses'),
      FixedDataModelElement('s4', b' msg=\'op=login '),
      FirstMatchModelElement('msgtype', [
          FixedDataModelElement('loginok', b'id=0'),
          SequenceModelElement('loginfail', [
              FixedDataModelElement('s0', b'acct='),
              ExecArgumentDataModelElement('account')
          ])]),
      FixedDataModelElement('s5', b' exe="'),
      DelimitedDataModelElement('exec', b'"'),
      FixedDataModelElement('s6', b'" hostname='),
      DelimitedDataModelElement('clientname', b' '),
      FixedDataModelElement('s7', b' addr='),
      DelimitedDataModelElement('clientip', b' '),
      FixedDataModelElement('s8', b' terminal='),
      WhiteSpaceLimitedDataModelElement('terminal'),
      FixedDataModelElement('s9', b' res='),
      pamStatusWordList,
      FixedDataModelElement('s10', b'\'')
  ])

  model = SequenceModelElement('audispd', [
      FixedDataModelElement('sname', b'audispd: '),
      FirstMatchModelElement('msg', [
          ElementValueBranchModelElement(
              'record', SequenceModelElement('preamble', [
                  FixedDataModelElement('s0', b'type='),
                  WhiteSpaceLimitedDataModelElement('type'),
                  FixedDataModelElement('s1', b' msg=audit('),
                  DecimalIntegerValueModelElement('time'),
                  FixedDataModelElement('s0', b'.'),
                  DecimalIntegerValueModelElement('ms'),
                  FixedDataModelElement('s1', b':'),
                  DecimalIntegerValueModelElement('seq'),
                  FixedDataModelElement('s2', b'):')
              ]),
              'type', typeBranches, defaultBranch=None),
          FixedDataModelElement('queue-full', b'queue is full - dropping event')
      ])
  ])
  return model
def get_model():
    """This function defines how to parse a su session information message after any standard logging preamble, e.g. from syslog."""

    type_children = [
        SequenceModelElement('queue', [
            FixedWordlistDataModelElement('type', [b'Start', b'End']),
            FixedDataModelElement('s0', b' queue run: pid='),
            DecimalIntegerValueModelElement('pid')
        ]),
        SequenceModelElement('rec-log', [
            WhiteSpaceLimitedDataModelElement('id'),
            FixedDataModelElement('s0', b' <= '),
            WhiteSpaceLimitedDataModelElement('env-from'),
            FirstMatchModelElement('source', [
                SequenceModelElement('network', [
                    FixedDataModelElement('s0', b' H=('),
                    DelimitedDataModelElement('hostname', b') '),
                    FixedDataModelElement('s1', b') ['),
                    IpAddressDataModelElement('hostip'),
                    FixedDataModelElement('s2', b']')
                ]),
                SequenceModelElement('user', [
                    FixedDataModelElement('s0', b' U='),
                    WhiteSpaceLimitedDataModelElement('user')
                ])
            ]),
            FixedDataModelElement('s2', b' P='),
            WhiteSpaceLimitedDataModelElement('proto'),
            FixedDataModelElement('s3', b' S='),
            DecimalIntegerValueModelElement('size'),
            OptionalMatchModelElement('idopt', SequenceModelElement('iddata', [
                FixedDataModelElement('s0', b' id='),
                AnyByteDataModelElement('id')
            ]))
        ]),
        SequenceModelElement('send-log', [
            WhiteSpaceLimitedDataModelElement('id'),
            # Strange: first address seems to use different separator than second one.
            FixedWordlistDataModelElement('s0', [b' => b', b' ->']),
            DelimitedDataModelElement('env-to', b' R='),
            FixedDataModelElement('s1', b' R='),
            WhiteSpaceLimitedDataModelElement('route'),
            FixedDataModelElement('s2', b' T='),
            WhiteSpaceLimitedDataModelElement('transport'),
            AnyByteDataModelElement('unparsed')
        ]),
        SequenceModelElement('sent', [
            WhiteSpaceLimitedDataModelElement('id'),
            FixedDataModelElement('s0', b' Completed')
        ]),
        SequenceModelElement('started', [
            FixedDataModelElement('s0', b' exim '),
            WhiteSpaceLimitedDataModelElement('version'),
            FixedDataModelElement('s1', b' daemon started: pid='),
            DecimalIntegerValueModelElement('pid'),
            FixedDataModelElement('s2', b', -q30m, listening for SMTP on [127.0.0.1]:25')
        ])
    ]

    model = SequenceModelElement('exim', [
        FixedDataModelElement('sname', b'exim['),
        DecimalIntegerValueModelElement('pid'),
        FixedDataModelElement('s0', b']: '),
        FirstMatchModelElement('msg', type_children)
    ])
    return model
    def run_new_match_id_value_combo_detector(self, min_allowed_time_diff):
        log_lines = [
            b'type=SYSCALL msg=audit(1580367384.000:1): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367385.000:1): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 '
            b'rdev=00:00 nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367386.000:2): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367387.000:2): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367388.000:3): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367389.000:3): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00'
            b' nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367388.500:100): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=SYSCALL msg=audit(1580367390.000:4): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367391.000:4): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=PATH msg=audit(1580367392.000:5): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367393.000:5): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=SYSCALL msg=audit(1580367394.000:6): arch=c000003e syscall=4 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367395.000:7): item=0 name="five" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367396.000:8): arch=c000003e syscall=6 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367397.000:6): item=0 name="four" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367398.000:7): arch=c000003e syscall=5 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367399.000:8): item=0 name="six" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367400.000:9): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367401.000:9): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 '
            b'rdev=00:00 nametype=NORMAL',
            b'type=PATH msg=audit(1580367402.000:10): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 '
            b'rdev=00:00 nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367403.000:10): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 '
            b'a3=4f items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 '
            b'tty=(none) ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)'
        ]
        parsing_model = FirstMatchModelElement('type', [
            SequenceModelElement('path', [
                FixedDataModelElement('type', b'type=PATH '),
                FixedDataModelElement('msg_audit', b'msg=audit('),
                DelimitedDataModelElement('msg', b':'),
                FixedDataModelElement('placeholder', b':'),
                DecimalIntegerValueModelElement('id'),
                FixedDataModelElement('item_string', b'): item='),
                DecimalIntegerValueModelElement('item'),
                FixedDataModelElement('name_string', b' name="'),
                DelimitedDataModelElement('name', b'"'),
                FixedDataModelElement('inode_string', b'" inode='),
                DecimalIntegerValueModelElement('inode'),
                FixedDataModelElement('dev_string', b' dev='),
                DelimitedDataModelElement('dev', b' '),
                FixedDataModelElement('mode_string', b' mode='),
                DecimalIntegerValueModelElement('mode'),
                FixedDataModelElement('ouid_string', b' ouid='),
                DecimalIntegerValueModelElement('ouid'),
                FixedDataModelElement('ogid_string', b' ogid='),
                DecimalIntegerValueModelElement('ogid'),
                FixedDataModelElement('rdev_string', b' rdev='),
                DelimitedDataModelElement('rdev', b' '),
                FixedDataModelElement('nametype_string', b' nametype='),
                FixedWordlistDataModelElement('nametype',
                                              [b'NORMAL', b'ERROR'])
            ]),
            SequenceModelElement('syscall', [
                FixedDataModelElement('type', b'type=SYSCALL '),
                FixedDataModelElement('msg_audit', b'msg=audit('),
                DelimitedDataModelElement('msg', b':'),
                FixedDataModelElement('placeholder', b':'),
                DecimalIntegerValueModelElement('id'),
                FixedDataModelElement('arch_string', b'): arch='),
                DelimitedDataModelElement('arch', b' '),
                FixedDataModelElement('syscall_string', b' syscall='),
                DecimalIntegerValueModelElement('syscall'),
                FixedDataModelElement('success_string', b' success='),
                FixedWordlistDataModelElement('success', [b'yes', b'no']),
                FixedDataModelElement('exit_string', b' exit='),
                DecimalIntegerValueModelElement('exit'),
                AnyByteDataModelElement('remainding_data')
            ])
        ])

        results = [None] * self.iterations
        avg = 0
        z = 0
        while z < self.iterations:
            i = 0
            new_match_id_value_combo_detector = NewMatchIdValueComboDetector(
                self.aminer_config,
                ['parser/type/path/name', 'parser/type/syscall/syscall'],
                [self.stream_printer_event_handler],
                id_path_list=['parser/type/path/id', 'parser/type/syscall/id'],
                min_allowed_time_diff=min_allowed_time_diff,
                auto_include_flag=False,
                allow_missing_values_flag=True,
                persistence_id='audit_type_path',
                output_log_line=False)
            t = time.time()
            seconds = time.time()
            i = 0
            while int(time.time() - seconds) < self.waiting_time:
                p = process_time()
                r = random.randint(0, len(log_lines) - 1)
                seconds = seconds + process_time() - p

                # this code just creates some data to be able to compare with other analysis components.
                decimal_integer_value_me = DecimalIntegerValueModelElement(
                    'd', DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                    DecimalIntegerValueModelElement.PAD_TYPE_NONE)
                match_context = MatchContext(str(i % 100).encode())
                _match_element = decimal_integer_value_me.get_match_element(
                    'integer', match_context)
                ########################################################################################

                line = log_lines[r]
                log_atom = LogAtom(
                    line,
                    ParserMatch(
                        parsing_model.get_match_element(
                            'parser', MatchContext(line))), t,
                    self.__class__.__name__)
                new_match_id_value_combo_detector.receive_atom(log_atom)
                i = i + 1
            results[z] = i
            z = z + 1
            avg = avg + i
        avg = avg / self.iterations
        type(self).result = self.result + self.result_string % (
            new_match_id_value_combo_detector.__class__.__name__, avg, results,
            '%.2f seconds min_allowed_time_diff.' % min_allowed_time_diff)
Example #14
0
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct AMiner how to process incoming data streams to create log atoms from them.
    """
    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:
    from aminer.parsing import FirstMatchModelElement, SequenceModelElement, DecimalFloatValueModelElement, FixedDataModelElement, \
        DelimitedDataModelElement, AnyByteDataModelElement, FixedWordlistDataModelElement, DecimalIntegerValueModelElement, \
        DateTimeModelElement, IpAddressDataModelElement, Base64StringModelElement, ElementValueBranchModelElement, HexStringModelElement, \
        MultiLocaleDateTimeModelElement, OptionalMatchModelElement, RepeatedElementDataModelElement, VariableByteDataModelElement, \
        WhiteSpaceLimitedDataModelElement

    service_children_disk_report = [
        FixedDataModelElement(
            'Space',
            b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'
        ),
        DelimitedDataModelElement('Data', b'%'),
        AnyByteDataModelElement('Rest')
    ]

    service_children_login_details = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status',
                                      [b' logged in', b' logged out']),
        OptionalMatchModelElement(
            'PastTime',
            SequenceModelElement('Time', [
                FixedDataModelElement('Blank', b' '),
                DecimalIntegerValueModelElement('Minutes'),
                FixedDataModelElement('Ago', b' minutes ago.')
            ]))
    ]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')
    ]

    service_children_random_time = [
        FixedDataModelElement('Space', b'Random: '),
        DecimalIntegerValueModelElement('Random')
    ]

    service_children_sensors = [
        SequenceModelElement('CPUTemp', [
            FixedDataModelElement('FixedTemp', b'CPU Temp: '),
            DecimalIntegerValueModelElement('Temp'),
            FixedDataModelElement('Degrees', b'\xc2\xb0C')
        ]),
        FixedDataModelElement('Space1', b', '),
        SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('FixedWorkload', b'CPU Workload: '),
            DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')
        ]),
        FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)
    ]

    service_children_user_ip_address = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '),
        IpAddressDataModelElement('IP')
    ]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('StartTime', b'\' in 5 min.')
    ]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')
    ]

    service_children_audit = [
        SequenceModelElement('path', [
            FixedDataModelElement('type', b'type=PATH '),
            FixedDataModelElement('msg_audit', b'msg=audit('),
            DelimitedDataModelElement('msg', b':'),
            FixedDataModelElement('placeholder', b':'),
            DecimalIntegerValueModelElement('id'),
            FixedDataModelElement('item_string', b'): item='),
            DecimalIntegerValueModelElement('item'),
            FixedDataModelElement('name_string', b' name="'),
            DelimitedDataModelElement('name', b'"'),
            FixedDataModelElement('inode_string', b'" inode='),
            DecimalIntegerValueModelElement('inode'),
            FixedDataModelElement('dev_string', b' dev='),
            DelimitedDataModelElement('dev', b' '),
            FixedDataModelElement('mode_string', b' mode='),
            DecimalIntegerValueModelElement('mode'),
            FixedDataModelElement('ouid_string', b' ouid='),
            DecimalIntegerValueModelElement('ouid'),
            FixedDataModelElement('ogid_string', b' ogid='),
            DecimalIntegerValueModelElement('ogid'),
            FixedDataModelElement('rdev_string', b' rdev='),
            DelimitedDataModelElement('rdev', b' '),
            FixedDataModelElement('nametype_string', b' nametype='),
            FixedWordlistDataModelElement('nametype', [b'NORMAL', b'ERROR'])
        ]),
        SequenceModelElement('syscall', [
            FixedDataModelElement('type', b'type=SYSCALL '),
            FixedDataModelElement('msg_audit', b'msg=audit('),
            DelimitedDataModelElement('msg', b':'),
            FixedDataModelElement('placeholder', b':'),
            DecimalIntegerValueModelElement('id'),
            FixedDataModelElement('arch_string', b'): arch='),
            DelimitedDataModelElement('arch', b' '),
            FixedDataModelElement('syscall_string', b' syscall='),
            DecimalIntegerValueModelElement('syscall'),
            FixedDataModelElement('success_string', b' success='),
            FixedWordlistDataModelElement('success', [b'yes', b'no']),
            FixedDataModelElement('exit_string', b' exit='),
            DecimalIntegerValueModelElement('exit'),
            AnyByteDataModelElement('remainding_data')
        ])
    ]

    service_children_parsing_model_element = [
        DateTimeModelElement('DateTimeModelElement',
                             b'Current DateTime: %d.%m.%Y %H:%M:%S'),
        DecimalFloatValueModelElement('DecimalFloatValueModelElement',
                                      value_sign_type='optional'),
        DecimalIntegerValueModelElement('DecimalIntegerValueModelElement',
                                        value_sign_type='optional',
                                        value_pad_type='blank'),
        SequenceModelElement('', [
            DelimitedDataModelElement('DelimitedDataModelElement', b';'),
            FixedDataModelElement('FixedDataModelElement', b';')
        ])
    ]

    # ElementValueBranchModelElement
    fixed_data_me1 = FixedDataModelElement("fixed1", b'match ')
    fixed_data_me2 = FixedDataModelElement("fixed2", b'fixed String')
    fixed_wordlist_data_model_element = FixedWordlistDataModelElement(
        "wordlist", [b'data: ', b'string: '])
    decimal_integer_value_model_element = DecimalIntegerValueModelElement(
        "decimal")

    service_children_parsing_model_element.append(
        ElementValueBranchModelElement(
            'ElementValueBranchModelElement',
            FirstMatchModelElement("first", [
                SequenceModelElement(
                    "seq1",
                    [fixed_data_me1, fixed_wordlist_data_model_element]),
                SequenceModelElement("seq2", [
                    fixed_data_me1, fixed_wordlist_data_model_element,
                    fixed_data_me2
                ])
            ]), "wordlist", {
                0: decimal_integer_value_model_element,
                1: fixed_data_me2
            }))
    service_children_parsing_model_element.append(
        HexStringModelElement('HexStringModelElement'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            FixedDataModelElement('FixedDataModelElement',
                                  b'Gateway IP-Address: '),
            IpAddressDataModelElement('IpAddressDataModelElement')
        ]))
    import locale
    loc = locale.getlocale()
    if loc == (None, None):
        loc = ('en_US', 'utf8')
    service_children_parsing_model_element.append(
        MultiLocaleDateTimeModelElement('MultiLocaleDateTimeModelElement',
                                        [(b'%b %d %Y', '%s.%s' %
                                          (loc), None)]))
    service_children_parsing_model_element.append(
        RepeatedElementDataModelElement(
            'RepeatedElementDataModelElement',
            SequenceModelElement('SequenceModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'drawn number: '),
                DecimalIntegerValueModelElement(
                    'DecimalIntegerValueModelElement')
            ]), 1))
    service_children_parsing_model_element.append(
        VariableByteDataModelElement('VariableByteDataModelElement', b'-@#'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            WhiteSpaceLimitedDataModelElement(
                'WhiteSpaceLimitedDataModelElement'),
            FixedDataModelElement('', b' ')
        ]))

    # The Base64StringModelElement must be just before the AnyByteDataModelElement to avoid unexpected Matches.
    service_children_parsing_model_element.append(
        Base64StringModelElement('Base64StringModelElement'))

    # The OptionalMatchModelElement must be paired with a FirstMatchModelElement because it accepts all data and thus no data gets to the
    # AnyByteDataModelElement. The AnyByteDataModelElement must be last, because all bytes are accepted.
    service_children_parsing_model_element.append(
        OptionalMatchModelElement(
            'OptionalMatchModelElement',
            FirstMatchModelElement('FirstMatchModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'The-searched-element-was-found!'),
                SequenceModelElement('', [
                    FixedDataModelElement('FixedDME', b'Any:'),
                    AnyByteDataModelElement('AnyByteDataModelElement')
                ])
            ])))

    alphabet = b'abcdef'
    service_children_ecd = []
    for _, char in enumerate(alphabet):
        char = bytes([char])
        service_children_ecd.append(FixedDataModelElement(char.decode(), char))

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement',
                             service_children_cron_job_announcement),
        SequenceModelElement('CronExecution',
                             service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job),
        SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details),
        DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time),
        SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address),
        FirstMatchModelElement('type', service_children_audit),
        FirstMatchModelElement('ECD', service_children_ecd),
        FirstMatchModelElement('ParsingME',
                               service_children_parsing_model_element)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
        [atom_filter])
    analysis_context.register_component(
        simple_monotonic_timestamp_adjust,
        component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    anomaly_event_handlers = [stream_printer_event_handler]

    # Now define the AtomizerFactory using the model. A simple line
    # based one is usually sufficient.
    from aminer.input import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_monotonic_timestamp_adjust],
        anomaly_event_handlers,
        default_timestamp_paths=["/model/DailyCron/DTM"])

    # Just report all unparsed atoms to the event handlers.
    from aminer.input import SimpleUnparsedAtomHandler, VerboseUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler,
                            stop_when_handled_flag=False)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="SimpleUnparsedHandler")

    verbose_unparsed_atom_handler = VerboseUnparsedAtomHandler(
        anomaly_event_handlers, parsing_model)
    atom_filter.add_handler(verbose_unparsed_atom_handler,
                            stop_when_handled_flag=True)
    analysis_context.register_component(
        verbose_unparsed_atom_handler, component_name="VerboseUnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(
        analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(
        timestamps_unsorted_detector,
        component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis import AllowlistViolationDetector

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule(
                    '/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(
                    Rules.ValueMatchRule('/model/LoginDetails/Username',
                                         b'root'))
            ]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(
                    Rules.PathExistsMatchRule(
                        '/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails')
            ]),
            Rules.NegationMatchRule(
                Rules.PathExistsMatchRule('/model/LoginDetails'))
        ])
    ]

    allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config,
        allowlist_rules,
        anomaly_event_handlers,
        output_log_line=True)
    analysis_context.register_component(allowlist_violation_detector,
                                        component_name="Allowlist")
    atom_filter.add_handler(allowlist_violation_detector)

    from aminer.analysis import ParserCount
    parser_count = ParserCount(analysis_context.aminer_config, None,
                               anomaly_event_handlers, 10)
    analysis_context.register_component(parser_count,
                                        component_name="ParserCount")
    atom_filter.add_handler(parser_count)

    from aminer.analysis.EventTypeDetector import EventTypeDetector
    etd = EventTypeDetector(analysis_context.aminer_config,
                            anomaly_event_handlers)
    analysis_context.register_component(etd,
                                        component_name="EventTypeDetector")
    atom_filter.add_handler(etd)

    from aminer.analysis.VariableTypeDetector import VariableTypeDetector
    vtd = VariableTypeDetector(analysis_context.aminer_config,
                               anomaly_event_handlers,
                               etd,
                               silence_output_except_indicator=False,
                               output_log_line=False)
    analysis_context.register_component(vtd,
                                        component_name="VariableTypeDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis.VariableCorrelationDetector import VariableCorrelationDetector
    vtd = VariableCorrelationDetector(analysis_context.aminer_config,
                                      anomaly_event_handlers,
                                      etd,
                                      disc_div_thres=0.5)
    analysis_context.register_component(
        vtd, component_name="VariableCorrelationDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis import EventCorrelationDetector
    ecd = EventCorrelationDetector(analysis_context.aminer_config,
                                   anomaly_event_handlers,
                                   check_rules_flag=True,
                                   hypothesis_max_delta_time=1.0,
                                   auto_include_flag=True)
    analysis_context.register_component(
        ecd, component_name="EventCorrelationDetector")
    atom_filter.add_handler(ecd)

    from aminer.analysis import MatchFilter
    match_filter = MatchFilter(analysis_context.aminer_config,
                               ['/model/Random'],
                               anomaly_event_handlers,
                               target_value_list=[1, 10, 100],
                               output_log_line=True)
    analysis_context.register_component(match_filter,
                                        component_name="MatchFilter")
    atom_filter.add_handler(match_filter)

    from aminer.analysis import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True,
        output_log_line=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(
            tuple(match_value_list))
        if extra_data is not None:
            mod = 10
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/UName', '/model/DailyCron/JobNumber'],
        anomaly_event_handlers,
        auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function,
        output_log_line=True)
    analysis_context.register_component(
        enhanced_new_match_path_value_combo_detector,
        component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(
        86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(
        analysis_context.aminer_config,
        [('/model/RandomTime/Random', modulo_time_bin_definition),
         ('/model/Random', linear_numeric_bin_definition)],
        10,
        anomaly_event_handlers,
        output_log_line=True)
    analysis_context.register_component(histogram_analysis,
                                        component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config,
        '/model/RandomTime',
        modulo_time_bin_definition,
        10,
        anomaly_event_handlers,
        output_log_line=True)
    analysis_context.register_component(
        path_dependent_histogram_analysis,
        component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        None, ['/model/Random'],
        100,
        10,
        output_log_line=True)
    analysis_context.register_component(
        match_value_average_change_detector,
        component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(sys.stdout, [
        '/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload',
        '/model/Sensors/DTM'
    ], b';', b'')
    analysis_context.register_component(
        match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers,
        output_log_line=True,
        auto_include_flag=True)
    analysis_context.register_component(
        new_match_path_value_combo_detector,
        component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchIdValueComboDetector import NewMatchIdValueComboDetector
    new_match_id_value_combo_detector = NewMatchIdValueComboDetector(
        analysis_context.aminer_config,
        ['/model/type/path/name', '/model/type/syscall/syscall'],
        anomaly_event_handlers,
        id_path_list=['/model/type/path/id', '/model/type/syscall/id'],
        min_allowed_time_diff=5,
        auto_include_flag=True,
        allow_missing_values_flag=True,
        output_log_line=True)
    analysis_context.register_component(
        new_match_id_value_combo_detector,
        component_name="NewMatchIdValueComboDetector")
    atom_filter.add_handler(new_match_id_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/JobNumber', '/model/IPAddresses/Username'],
        anomaly_event_handlers,
        auto_include_flag=True,
        output_log_line=True)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config,
        '/model/DiskReport/Space',
        anomaly_event_handlers,
        auto_include_flag=True,
        default_interval=2,
        realert_interval=5,
        output_log_line=True)
    analysis_context.register_component(missing_match_path_value_detector,
                                        component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        2,
        min_rule_attributes=1,
        max_rule_attributes=5,
        record_count_before_event=10000,
        output_log_line=True)
    analysis_context.register_component(
        time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule(
        'CronJobAnnouncement',
        5,
        6,
        max_artefacts_a_for_single_b=1,
        artefact_match_parameters=[('/model/CronAnnouncement/JobNumber',
                                    '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement',
                                          [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None,
                                          [cron_job_announcement])
    rules = [
        Rules.PathExistsMatchRule('/model/CronAnnouncement/Run',
                                  a_class_selector),
        Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)
    ]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(
        analysis_context.aminer_config,
        rules,
        anomaly_event_handlers,
        output_log_line=True)
    analysis_context.register_component(
        time_correlation_violation_detector,
        component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)
Example #15
0
def build_analysis_pipeline(analysis_context):
    """Define the function to create pipeline for parsing the log data. It has also to define an AtomizerFactory to instruct AMiner
    how to process incoming data streams to create log atoms from them."""

    # Build the parsing model:
    from aminer.parsing import FirstMatchModelElement
    from aminer.parsing import SequenceModelElement

    from aminer.parsing.DateTimeModelElement import DateTimeModelElement
    import datetime
    from aminer.parsing import FixedDataModelElement
    from aminer.parsing.DelimitedDataModelElement import DelimitedDataModelElement
    from aminer.parsing import AnyByteDataModelElement

    service_children_disk_upgrade = [
        DateTimeModelElement(
            'DTM', b'%Y-%m-%d %H:%M:%S',
            datetime.datetime.now(datetime.timezone.utc).astimezone().tzinfo),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('HDRepair',
                              b' System rebooted for hard disk upgrade')
    ]

    service_children_home_path = [
        FixedDataModelElement(
            'Pwd',
            b'The Path of the home directory shown by pwd of the user '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Is', b' is: '),
        AnyByteDataModelElement('Path')
    ]

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('DiskUpgrade', service_children_disk_upgrade),
        SequenceModelElement('HomePath', service_children_home_path)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    from aminer.events.SyslogWriterEventHandler import SyslogWriterEventHandler
    syslog_writer_event_handler = SyslogWriterEventHandler(analysis_context)
    anomaly_event_handlers = [
        stream_printer_event_handler, syslog_writer_event_handler
    ]

    from aminer.input import SimpleMultisourceAtomSync
    simple_multisource_atom_sync = SimpleMultisourceAtomSync([atom_filter], 9)

    # Now define the AtomizerFactory using the model. A simple line
    # based one is usually sufficient.
    from aminer.input import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_multisource_atom_sync],
        anomaly_event_handlers,
        default_timestamp_paths=['model/DiskUpgrade/Date'])

    # Just report all unparsed atoms to the event handlers.
    from aminer.input import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler,
                            stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewPath")
    atom_filter.add_handler(new_match_path_detector)

    from aminer.analysis import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/HomePath/Username', '/model/HomePath/Path'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_value_combo_detector,
                                        component_name="NewValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    # Include the e-mail notification handler only if the configuration parameter was set.
    from aminer.events import DefaultMailNotificationEventHandler
    if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysis_context.aminer_config.config_properties:
        mail_notification_handler = DefaultMailNotificationEventHandler(
            analysis_context)
        analysis_context.register_component(mail_notification_handler,
                                            component_name="MailHandler")
        anomaly_event_handlers.append(mail_notification_handler)
def build_analysis_pipeline(analysis_context):
    """Define the function to create pipeline for parsing the log
    data. It has also to define an AtomizerFactory to instruct AMiner
    how to process incoming data streams to create log atoms from
    them."""

    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement(
            'Space',
            b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'
        ),
        DelimitedDataModelElement('Data', b'%'),
        AnyByteDataModelElement('Rest')
    ]

    service_children_login_details = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status',
                                      [b' logged in', b' logged out']),
        OptionalMatchModelElement(
            'PastTime',
            SequenceModelElement('Time', [
                FixedDataModelElement('Blank', b' '),
                DecimalIntegerValueModelElement('Minutes'),
                FixedDataModelElement('Ago', b' minutes ago.')
            ]))
    ]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')
    ]

    service_children_random_time = [
        FixedDataModelElement('Space', b'Random: '),
        DecimalIntegerValueModelElement('Random')
    ]

    service_children_sensors = [
        SequenceModelElement('CPUTemp', [
            FixedDataModelElement('FixedTemp', b'CPU Temp: '),
            DecimalIntegerValueModelElement('Temp'),
            FixedDataModelElement('Degrees', b'\xc2\xb0C')
        ]),
        FixedDataModelElement('Space1', b', '),
        SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('Fixed Workload', b'CPU Workload: '),
            DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')
        ]),
        FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)
    ]

    service_children_user_ip_address = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '),
        IpAddressDataModelElement('IP')
    ]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Start Time', b'\' in 5 min.')
    ]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')
    ]

    service_children_parsing_model_element = [
        DateTimeModelElement('DateTimeModelElement',
                             b'Current DateTime: %d.%m.%Y %H:%M:%S'),
        DecimalFloatValueModelElement('DecimalFloatValueModelElement',
                                      value_sign_type='optional'),
        DecimalIntegerValueModelElement('DecimalIntegerValueModelElement',
                                        value_sign_type='optional',
                                        value_pad_type='blank'),
        SequenceModelElement('', [
            DelimitedDataModelElement('DelimitedDataModelElement', b';'),
            FixedDataModelElement('FixedDataModelElement', b';')
        ])
    ]

    # ElementValueBranchModelElement
    fixed_data_me1 = FixedDataModelElement("fixed1", b'match ')
    fixed_data_me2 = FixedDataModelElement("fixed2", b'fixed String')
    fixed_wordlist_data_model_element = FixedWordlistDataModelElement(
        "wordlist", [b'data: ', b'string: '])
    decimal_integer_value_model_element = DecimalIntegerValueModelElement(
        "decimal")

    service_children_parsing_model_element.append(
        ElementValueBranchModelElement(
            'ElementValueBranchModelElement',
            FirstMatchModelElement("first", [
                SequenceModelElement(
                    "seq1",
                    [fixed_data_me1, fixed_wordlist_data_model_element]),
                SequenceModelElement("seq2", [
                    fixed_data_me1, fixed_wordlist_data_model_element,
                    fixed_data_me2
                ])
            ]), "wordlist", {
                0: decimal_integer_value_model_element,
                1: fixed_data_me2
            }))
    service_children_parsing_model_element.append(
        HexStringModelElement('HexStringModelElement'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            FixedDataModelElement('FixedDataModelElement',
                                  b'Gateway IP-Address: '),
            IpAddressDataModelElement('IpAddressDataModelElement')
        ]))
    service_children_parsing_model_element.append(
        MultiLocaleDateTimeModelElement('MultiLocaleDateTimeModelElement',
                                        [(b'%b %d %Y', "de_AT.utf8", None)]))
    service_children_parsing_model_element.append(
        RepeatedElementDataModelElement(
            'RepeatedElementDataModelElement',
            SequenceModelElement('SequenceModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'drawn number: '),
                DecimalIntegerValueModelElement(
                    'DecimalIntegerValueModelElement')
            ]), 1))
    service_children_parsing_model_element.append(
        VariableByteDataModelElement('VariableByteDataModelElement', b'-@#'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            WhiteSpaceLimitedDataModelElement(
                'WhiteSpaceLimitedDataModelElement'),
            FixedDataModelElement('', b' ')
        ]))

    # The Base64StringModelElement must be just before the AnyByteDataModelElement to avoid unexpected Matches.
    service_children_parsing_model_element.append(
        Base64StringModelElement('Base64StringModelElement'))

    # The OptionalMatchModelElement must be paired with a FirstMatchModelElement because it accepts all data and thus no data gets
    # to the AnyByteDataModelElement. The AnyByteDataModelElement must be last, because all bytes are accepted.
    service_children_parsing_model_element.append(
        OptionalMatchModelElement(
            'OptionalMatchModelElement',
            FirstMatchModelElement('FirstMatchModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'The-searched-element-was-found!'),
                AnyByteDataModelElement('AnyByteDataModelElement')
            ])))

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement',
                             service_children_cron_job_announcement),
        SequenceModelElement('CronExecution',
                             service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job),
        SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details),
        DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time),
        SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address),
        FirstMatchModelElement('ParsingME',
                               service_children_parsing_model_element)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers
    # later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
        [atom_filter])
    analysis_context.register_component(
        simple_monotonic_timestamp_adjust,
        component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    from aminer.events.SyslogWriterEventHandler import SyslogWriterEventHandler
    syslog_event_handler = SyslogWriterEventHandler(analysis_context)
    from aminer.events import DefaultMailNotificationEventHandler
    if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysis_context.aminer_config.config_properties:
        mail_notification_handler = DefaultMailNotificationEventHandler(
            analysis_context)
        analysis_context.register_component(mail_notification_handler,
                                            component_name="MailHandler")
    anomaly_event_handlers = [
        stream_printer_event_handler, syslog_event_handler,
        mail_notification_handler
    ]

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_monotonic_timestamp_adjust],
        anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.input import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler,
                            stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(
        analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(
        timestamps_unsorted_detector,
        component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis import WhitelistViolationDetector
    whitelist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule(
                    '/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(
                    Rules.ValueMatchRule('/model/LoginDetails/Username',
                                         b'root'))
            ]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(
                    Rules.PathExistsMatchRule(
                        '/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails')
            ]),
            Rules.NegationMatchRule(
                Rules.PathExistsMatchRule('/model/LoginDetails'))
        ])
    ]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    whitelist_violation_detector = WhitelistViolationDetector(
        analysis_context.aminer_config, whitelist_rules,
        anomaly_event_handlers)
    analysis_context.register_component(whitelist_violation_detector,
                                        component_name="Whitelist")
    atom_filter.add_handler(whitelist_violation_detector)

    from aminer.analysis import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(
            tuple(match_value_list), None)
        if extra_data is not None:
            mod = 10000
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/UName', '/model/DailyCron/JobNumber'],
        anomaly_event_handlers,
        auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function)
    analysis_context.register_component(
        enhanced_new_match_path_value_combo_detector,
        component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(
        86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(
        analysis_context.aminer_config,
        [('/model/RandomTime/Random', modulo_time_bin_definition),
         ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers)
    analysis_context.register_component(histogram_analysis,
                                        component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime',
        modulo_time_bin_definition, 10, anomaly_event_handlers)
    analysis_context.register_component(
        path_dependent_histogram_analysis,
        component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(
        analysis_context.aminer_config, anomaly_event_handlers, None,
        ['/model/Random'], 100, 10)
    analysis_context.register_component(
        match_value_average_change_detector,
        component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(sys.stdout, [
        '/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload',
        '/model/Sensors/DTM'
    ], b';', b'')
    analysis_context.register_component(
        match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(
        new_match_path_value_combo_detector,
        component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/JobNumber', '/model/IPAddresses/Username'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config,
        '/model/DiskReport/Space',
        anomaly_event_handlers,
        auto_include_flag=True,
        default_interval=2,
        realert_interval=5)
    analysis_context.register_component(missing_match_path_value_detector,
                                        component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config,
        2,
        1,
        0,
        anomaly_event_handlers,
        record_count_before_event=70000)
    analysis_context.register_component(
        time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule(
        'CronJobAnnouncement',
        5,
        6,
        max_artefacts_a_for_single_b=1,
        artefact_match_parameters=[('/model/CronAnnouncement/JobNumber',
                                    '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement',
                                          [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None,
                                          [cron_job_announcement])
    rules = [
        Rules.PathExistsMatchRule('/model/CronAnnouncement/Run',
                                  a_class_selector),
        Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)
    ]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(
        analysis_context.aminer_config, rules, anomaly_event_handlers)
    analysis_context.register_component(
        time_correlation_violation_detector,
        component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)
Example #17
0
def get_model():
    """Return a model to parse Exim logs from the AIT-LDS."""
    alphabet = b'!"#$%&\'()*+,-./0123456789:;<>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz{|}~=[]'

    model = SequenceModelElement('model', [
        DateTimeModelElement('time', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('sp', b' '),
        FirstMatchModelElement('fm', [
            SequenceModelElement('start', [
                FixedDataModelElement('start', b'Start queue run: pid='),
                DecimalIntegerValueModelElement('pid'),
                ]),
            SequenceModelElement('end', [
                FixedDataModelElement('end', b'End queue run: pid='),
                DecimalIntegerValueModelElement('pid'),
                ]),
            SequenceModelElement('no_host_found', [
                FixedDataModelElement('no_host_found_str', b'no host name found for IP address '),
                IpAddressDataModelElement('ip'),
                ]),
            SequenceModelElement('vrfy_failed', [
                FixedDataModelElement('vrfy_failed_str', b'VRFY failed for '),
                DelimitedDataModelElement('mail', b' '),
                FixedDataModelElement('h_str', b' H='),
                DelimitedDataModelElement('h', b' '),
                FixedDataModelElement('sp1', b' ['),
                IpAddressDataModelElement('ip'),
                FixedDataModelElement('sp2', b']')
                ]),
            SequenceModelElement('mail', [
                DelimitedDataModelElement('id', b' '),
                FirstMatchModelElement('dir', [
                    SequenceModelElement('dir_in', [
                        FixedDataModelElement('in', b' <= '),
                        FirstMatchModelElement('fm', [
                            SequenceModelElement('seq1', [
                                FixedDataModelElement('brack', b'<> '),
                                FirstMatchModelElement('fm', [
                                    SequenceModelElement('r', [
                                        FixedDataModelElement('r_str', b'R='),
                                        DelimitedDataModelElement('r', b' '),
                                        FixedDataModelElement('u_str', b' U='),
                                        DelimitedDataModelElement('u', b' '),
                                        ]),
                                    SequenceModelElement('h', [
                                        FixedDataModelElement('h_str', b'H='),
                                        DelimitedDataModelElement('h', b' '),
                                        FixedDataModelElement('sp1', b' ['),
                                        IpAddressDataModelElement('ip'),
                                        FixedDataModelElement('sp1', b']'),
                                        ])
                                    ]),
                                FixedDataModelElement('sp2', b' P='),
                                DelimitedDataModelElement('p', b' '),
                                FixedDataModelElement('sp2', b' S='),
                                DecimalIntegerValueModelElement('s'),
                                ]),
                            SequenceModelElement('seq2', [
                                DelimitedDataModelElement('mail', b' '),
                                FixedDataModelElement('user_str', b' U='),
                                DelimitedDataModelElement('user', b' '),
                                FixedDataModelElement('p_str', b' P='),
                                DelimitedDataModelElement('p', b' '),
                                FixedDataModelElement('s_str', b' S='),
                                DecimalIntegerValueModelElement('s'),
                                OptionalMatchModelElement(
                                    'id', SequenceModelElement('id', [
                                        FixedDataModelElement('id_str', b' id='),
                                        AnyByteDataModelElement('id')
                                        ])
                                    )
                                ])
                            ])
                        ]),
                    SequenceModelElement('dir_out', [
                        FixedDataModelElement('in', b' => '),
                        DelimitedDataModelElement('name', b' '),
                        FixedDataModelElement('sp1', b' '),
                        OptionalMatchModelElement(
                            'mail_opt', SequenceModelElement('mail', [
                                FixedDataModelElement('brack1', b'('),
                                DelimitedDataModelElement('brack_mail', b')'),
                                FixedDataModelElement('brack2', b') '),
                                ])),
                        FixedDataModelElement('sp2', b'<'),
                        DelimitedDataModelElement('mail', b'>'),
                        FixedDataModelElement('r_str', b'> R='),
                        DelimitedDataModelElement('r', b' '),
                        FixedDataModelElement('t_str', b' T='),
                        VariableByteDataModelElement('t', alphabet),
                        ]),
                    SequenceModelElement('aster', [
                        FixedDataModelElement('aster', b' ** '),
                        DelimitedDataModelElement('command', b' '),
                        FixedDataModelElement('headers_str', b' Too many "Received" headers - suspected mail loop')]),
                    FixedDataModelElement('completed', b' Completed'),
                    FixedDataModelElement('frozen', b' Message is frozen'),
                    FixedDataModelElement('frozen', b' Frozen (delivery error message)')
                ])
                ])])])

    return model