Beispiel #1
0
def get_model():
    """Return the model."""
    type_children = [
        SequenceModelElement("sent", [
            FixedDataModelElement("s0", b"Sent mail for "),
            DelimitedDataModelElement("to-addr", b" ("),
            FixedDataModelElement("s1", b" ("),
            DelimitedDataModelElement("status", b") uid="),
            FixedDataModelElement("s2", b") uid="),
            DecimalIntegerValueModelElement("uid"),
            FixedDataModelElement("s3", b" username="******"username", b" outbytes="),
            FixedDataModelElement("s4", b" outbytes="),
            DecimalIntegerValueModelElement("bytes")
        ]),
        SequenceModelElement("sent", [
            DelimitedDataModelElement("program", b" "),
            FixedDataModelElement("s0", b" sent mail for "),
            AnyByteDataModelElement("user")
        ])
    ]

    model = SequenceModelElement("ssmtp", [
        FixedDataModelElement("sname", b"sSMTP["),
        DecimalIntegerValueModelElement("pid"),
        FixedDataModelElement("s0", b"]: "),
        FirstMatchModelElement("msg", type_children)
    ])
    return model
def get_model():
    """Return a model to parse Suricata Fast logs from the AIT-LDS."""
    model = SequenceModelElement("model", [
        DateTimeModelElement("time", b"%m/%d/%Y-%H:%M:%S.%f"),
        FixedDataModelElement("brack_str1", b"  [**] ["),
        DecimalIntegerValueModelElement("id1"),
        FixedDataModelElement("sep1", b":"),
        DecimalIntegerValueModelElement("id2"),
        FixedDataModelElement("sep2", b":"),
        DecimalIntegerValueModelElement("id3"),
        FixedDataModelElement("sep3", b"] "),
        DelimitedDataModelElement("message", b" [**] "),
        FixedDataModelElement("classification_str",
                              b" [**] [Classification: "),
        DelimitedDataModelElement("classification", b"]"),
        FixedDataModelElement("priority_str", b"] [Priority: "),
        DecimalIntegerValueModelElement("priority"),
        FixedDataModelElement("brack_str1", b"] {"),
        DelimitedDataModelElement("conn", b"}"),
        FixedDataModelElement("brack_str2", b"} "),
        IpAddressDataModelElement("src_ip"),
        FixedDataModelElement("colon", b":"),
        DecimalIntegerValueModelElement("src_port"),
        FixedDataModelElement("arrow_str", b" -> "),
        IpAddressDataModelElement("dst_ip"),
        FixedDataModelElement("colon", b":"),
        DecimalIntegerValueModelElement("dst_port"),
    ])
    return model
Beispiel #3
0
 def test2delimeter_string(self):
     """In this test case a whole string is searched for in the match_data."""
     match_context = MatchContext(self.match_context_string)
     delimited_data_model_element = DelimitedDataModelElement('id', b' is')
     match_element = delimited_data_model_element.get_match_element(
         'match', match_context)
     self.assertEqual(match_element.get_match_string(), b'this')
Beispiel #4
0
def get_model():
    """Return a model to parse Suricata Fast logs from the AIT-LDS."""
    model = SequenceModelElement('model', [
        DateTimeModelElement('time', b'%m/%d/%Y-%H:%M:%S.%f'),
        FixedDataModelElement('brack_str1', b'  [**] ['),
        DecimalIntegerValueModelElement('id1'),
        FixedDataModelElement('sep1', b':'),
        DecimalIntegerValueModelElement('id2'),
        FixedDataModelElement('sep2', b':'),
        DecimalIntegerValueModelElement('id3'),
        FixedDataModelElement('sep3', b'] '),
        DelimitedDataModelElement('message', b' [**] '),
        FixedDataModelElement('classification_str',
                              b' [**] [Classification: '),
        DelimitedDataModelElement('classification', b']'),
        FixedDataModelElement('priority_str', b'] [Priority: '),
        DecimalIntegerValueModelElement('priority'),
        FixedDataModelElement('brack_str1', b'] {'),
        DelimitedDataModelElement('conn', b'}'),
        FixedDataModelElement('brack_str2', b'} '),
        IpAddressDataModelElement('src_ip'),
        FixedDataModelElement('colon', b':'),
        DecimalIntegerValueModelElement('src_port'),
        FixedDataModelElement('arrow_str', b' -> '),
        IpAddressDataModelElement('dst_ip'),
        FixedDataModelElement('colon', b':'),
        DecimalIntegerValueModelElement('dst_port'),
    ])
    return model
Beispiel #5
0
 def test4get_match_element_single_char_no_match(self):
     """A single character is used as delimiter and not matched."""
     data = b"this is a match context.\n"
     for char in "bdfgjklpqruvwyz":
         delimited_data_model_element = DelimitedDataModelElement(
             self.id_, char.encode())
         match_context = DummyMatchContext(data)
         match_element = delimited_data_model_element.get_match_element(
             self.path, match_context)
         self.compare_no_match_results(data, match_element, match_context)
Beispiel #6
0
    def test7special_characters_escape(self):
        """In this test case special character escaping is tested. The delimiter is not consumed (consume_delimiter=False)."""
        data = b'error: the command \\"python run.py\\" was not found" '
        value = b'error: the command \\"python run.py\\" was not found'
        match_context = DummyMatchContext(data)
        delimited_data_model_element = DelimitedDataModelElement(
            self.id_, b'"', b"\\")
        match_element = delimited_data_model_element.get_match_element(
            self.path, match_context)
        self.compare_match_results(data, match_element, match_context,
                                   self.id_, self.path, value, value, None)

        data = rb"^This is a simple regex string. It costs 10\$.$"
        value = rb"^This is a simple regex string. It costs 10\$."
        match_context = DummyMatchContext(data)
        delimited_data_model_element = DelimitedDataModelElement(
            self.id_, b"$", b"\\")
        match_element = delimited_data_model_element.get_match_element(
            self.path, match_context)
        self.compare_match_results(data, match_element, match_context,
                                   self.id_, self.path, value, value, None)

        data = b"the searched file is .gitignore."
        value = b"the searched file is .gitignore"
        match_context = DummyMatchContext(data)
        delimited_data_model_element = DelimitedDataModelElement(
            self.id_, b".", b" ")
        match_element = delimited_data_model_element.get_match_element(
            self.path, match_context)
        self.compare_match_results(data, match_element, match_context,
                                   self.id_, self.path, value, value, None)
Beispiel #7
0
    def test8special_characters_escape_no_match(self):
        """In this test case special character escaping is tested without matching."""
        data = b'error: the command \\"python run.py\\" was not found\\" '
        match_context = DummyMatchContext(data)
        delimited_data_model_element = DelimitedDataModelElement(
            self.id_, b'"', b"\\")
        match_element = delimited_data_model_element.get_match_element(
            self.path, match_context)
        self.compare_no_match_results(data, match_element, match_context)

        data = rb"^This is a simple regex string. It costs 10\$.\$"
        match_context = DummyMatchContext(data)
        delimited_data_model_element = DelimitedDataModelElement(
            self.id_, b"$", b"\\")
        match_element = delimited_data_model_element.get_match_element(
            self.path, match_context)
        self.compare_no_match_results(data, match_element, match_context)

        data = b"the searched file is .gitignore ."
        match_context = DummyMatchContext(data)
        delimited_data_model_element = DelimitedDataModelElement(
            self.id_, b".", b" ")
        match_element = delimited_data_model_element.get_match_element(
            self.path, match_context)
        self.compare_no_match_results(data, match_element, match_context)
Beispiel #8
0
    def test4special_characters_escape(self):
        """In this test case special character escaping is tested."""
        match_context = MatchContext(
            b'error: the command \\"python run.py\\" was not found" ')
        delimited_data_model_element = DelimitedDataModelElement(
            'id', b'"', b'\\')
        match_element = delimited_data_model_element.get_match_element(
            'match', match_context)
        self.assertEqual(
            match_element.get_match_string(),
            b'error: the command \\"python run.py\\" was not found')

        match_context = MatchContext(
            rb'^This is a simple regex string. It costs 10\$.$')
        delimited_data_model_element = DelimitedDataModelElement(
            'id', b'$', b'\\')
        match_element = delimited_data_model_element.get_match_element(
            'match', match_context)
        self.assertEqual(match_element.get_match_string(),
                         rb'^This is a simple regex string. It costs 10\$.')

        match_context = MatchContext(b'the searched file is .gitignore.')
        delimited_data_model_element = DelimitedDataModelElement(
            'id', b'.', b' ')
        match_element = delimited_data_model_element.get_match_element(
            'match', match_context)
        self.assertEqual(match_element.get_match_string(),
                         b'the searched file is .gitignore')
def get_systemd_model():
    """Return the parsing model for messages directly from systemd."""
    type_children = [
        FixedDataModelElement("apt-daily-start", b"Starting Daily apt upgrade and clean activities..."),
        FixedDataModelElement("apt-daily-started", b"Started Daily apt upgrade and clean activities."),
        FixedDataModelElement("apt-daily-finished", b"Finished Daily apt upgrade and clean activities."),
        SequenceModelElement("service-succeeded", [
            DelimitedDataModelElement("service", b" "),
            FixedDataModelElement("s0", b" Succeeded.")
        ]),
        FixedDataModelElement("clean-php", b"Finished Clean php session files."),
        FixedDataModelElement("finished-logrotate", b"Finished Rotate log files."),
        FixedDataModelElement("finished-man-db-daily", b"Finished Daily man-db regeneration."),
        FixedDataModelElement("finished-ubuntu-advantages", b"Finished Ubuntu Advantage APT and MOTD Messages."),
        FixedDataModelElement("finished-refresh", b"Finished Refresh fwupd metadata and update motd."),
        FixedDataModelElement("finished-daily-apt", b"Finished Daily apt download activities."),
        SequenceModelElement("apt-daily-timer", [
            FixedDataModelElement("s0", b"apt-daily.timer: Adding "),
            OptionalMatchModelElement("hopt", SequenceModelElement("hblock", [
                DecimalIntegerValueModelElement("hours"),
                FixedDataModelElement("s1", b"h ")
            ])),
            DecimalIntegerValueModelElement("minutes"),
            FixedDataModelElement("s2", b"min "),
            DecimalFloatValueModelElement("seconds"),
            FixedDataModelElement("s3", b"s random time.")
        ]),
        FixedDataModelElement("tmp-file-cleanup", b"Starting Cleanup of Temporary Directories..."),
        FixedDataModelElement("tmp-file-cleanup-started", b"Started Cleanup of Temporary Directories."),
        SequenceModelElement("killing-process", [
            DelimitedDataModelElement("service", b":"),
            FixedDataModelElement("s0", b": Killing process "),
            DecimalIntegerValueModelElement("pid"),
            FixedDataModelElement("s1", b" (update-notifier) with signal SIGKILL.")
        ]),
        SequenceModelElement("starting", [
            FixedDataModelElement("s0", b"Starting "),
            DelimitedDataModelElement("service", b"."),
            FixedDataModelElement("s1", b"...")
        ]),
        SequenceModelElement("started", [
            FixedDataModelElement("s0", b"Started "),
            DelimitedDataModelElement("service", b".", consume_delimiter=True)
        ]),
        FixedDataModelElement("reloading", b"Reloading.")
    ]

    model = SequenceModelElement("systemd", [
        FixedDataModelElement("sname", b"systemd["),
        DecimalIntegerValueModelElement("pid"),
        FixedDataModelElement("s0", b"]: "),
        FirstMatchModelElement("msg", type_children)
    ])
    return model
def get_model():
    """Return a model to parse messages from kernel logging."""
    type_children = [
        SequenceModelElement("ipv4-martian", [
            FixedDataModelElement("s0", b"IPv4: martian "),
            FixedWordlistDataModelElement("direction", [b"source", b"destination"]),
            FixedDataModelElement("s1", b" "),
            IpAddressDataModelElement("destination"),
            FixedDataModelElement("s2", b" from "),
            IpAddressDataModelElement("source"),
            FixedDataModelElement("s3", b", on dev "),
            AnyByteDataModelElement("interface")]),
        SequenceModelElement("net-llheader", [
            FixedDataModelElement("s0", b"ll header: "),
            AnyByteDataModelElement("data")
        ]),
        AnyByteDataModelElement("unparsed")
    ]

    model = SequenceModelElement("kernel", [
        FixedDataModelElement("sname", b"kernel: "),
        OptionalMatchModelElement("opt", SequenceModelElement("seq", [
            FixedDataModelElement("opt_s0", b"]"),
            DelimitedDataModelElement("timestamp", b"]"),
            FixedDataModelElement("opt_s1", b"] "),
        ])),
        FirstMatchModelElement("msg", type_children)
    ])
    return model
def get_model():
    """Return a model to parse messages from kernel logging."""
    type_children = [
        SequenceModelElement('ipv4-martian', [
            FixedDataModelElement('s0', b'IPv4: martian '),
            FixedWordlistDataModelElement('direction',
                                          [b'source', b'destination']),
            FixedDataModelElement('s1', b' '),
            IpAddressDataModelElement('destination'),
            FixedDataModelElement('s2', b' from '),
            IpAddressDataModelElement('source'),
            FixedDataModelElement('s3', b', on dev '),
            AnyByteDataModelElement('interface')
        ]),
        SequenceModelElement('net-llheader', [
            FixedDataModelElement('s0', b'll header: '),
            AnyByteDataModelElement('data')
        ]),
        AnyByteDataModelElement('unparsed')
    ]

    model = SequenceModelElement('kernel', [
        FixedDataModelElement('sname', b'kernel: ['),
        DelimitedDataModelElement('timestamp', b']'),
        FixedDataModelElement('s0', b'] '),
        FirstMatchModelElement('msg', type_children)
    ])
    return model
def get_model():
    """Return a model to parse a su session information message after any standard logging preamble, e.g. from syslog."""
    type_children = [
        SequenceModelElement('gidchange', [
            FixedDataModelElement('s0', b'rsyslogd\'s groupid changed to '),
            DecimalIntegerValueModelElement('gid')
        ]),
        SequenceModelElement('statechange', [
            FixedDataModelElement('s0',
                                  b'[origin software="rsyslogd" swVersion="'),
            DelimitedDataModelElement('version', b'"'),
            FixedDataModelElement('s1', b'" x-pid="'),
            DecimalIntegerValueModelElement('pid'),
            FixedDataModelElement('s2',
                                  b'" x-info="http://www.rsyslog.com"] '),
            FirstMatchModelElement('type', [
                FixedDataModelElement('HUPed', b'rsyslogd was HUPed'),
                FixedDataModelElement('start', b'start')
            ])
        ]),
        SequenceModelElement('uidchange', [
            FixedDataModelElement('s0', b'rsyslogd\'s userid changed to '),
            DecimalIntegerValueModelElement('uid')
        ])
    ]

    model = SequenceModelElement('rsyslog', [
        FixedDataModelElement('sname', b'rsyslogd: '),
        FirstMatchModelElement('msg', type_children)
    ])
    return model
Beispiel #13
0
def get_model():
    """Return a parser for apache2 access.log."""
    new_time_model = DateTimeModelElement("time", b"[%d/%b/%Y:%H:%M:%S%z")
    host_name_model = VariableByteDataModelElement(
        "host", b"-.01234567890abcdefghijklmnopqrstuvwxyz:")
    identity_model = VariableByteDataModelElement(
        "ident", b"-.01234567890abcdefghijklmnopqrstuvwxyz:")
    user_name_model = VariableByteDataModelElement(
        "user", b"0123456789abcdefghijklmnopqrstuvwxyz.-")
    request_method_model = FirstMatchModelElement("fm", [
        FixedDataModelElement("dash", b"-"),
        SequenceModelElement("request", [
            FixedWordlistDataModelElement("method", [
                b"GET", b"POST", b"PUT", b"HEAD", b"DELETE", b"CONNECT",
                b"OPTIONS", b"TRACE", b"PATCH"
            ]),
            FixedDataModelElement("sp5", b" "),
            DelimitedDataModelElement("request", b" ", b"\\"),
            FixedDataModelElement("sp6", b" "),
            DelimitedDataModelElement("version", b'"'),
        ])
    ])
    status_code_model = DecimalIntegerValueModelElement("status")
    size_model = DecimalIntegerValueModelElement("size")

    whitespace_str = b" "
    model = SequenceModelElement("accesslog", [
        host_name_model,
        FixedDataModelElement("sp0", whitespace_str), identity_model,
        FixedDataModelElement("sp1", whitespace_str), user_name_model,
        FixedDataModelElement("sp2", whitespace_str), new_time_model,
        FixedDataModelElement("sp3", b'] "'), request_method_model,
        FixedDataModelElement("sp6", b'" '), status_code_model,
        FixedDataModelElement("sp7", whitespace_str), size_model,
        OptionalMatchModelElement(
            "combined",
            SequenceModelElement("combined", [
                FixedDataModelElement("sp9", b' "'),
                DelimitedDataModelElement("referer", b'"', b"\\"),
                FixedDataModelElement("sp10", b'" "'),
                DelimitedDataModelElement("user_agent", b'"', b"\\"),
                FixedDataModelElement("sp11", b'"')
            ]))
    ])
    return model
Beispiel #14
0
def get_model():
    """Return a model to parse Apache Access logs from the AIT-LDS."""
    alphabet = b"!'#$%&\"()*+,-./0123456789:;<>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz{|}~=[]"

    model = SequenceModelElement("model", [
        FirstMatchModelElement("client_ip", [
            IpAddressDataModelElement("client_ip"),
            FixedDataModelElement("localhost", b"::1")
        ]),
        FixedDataModelElement("sp1", b" "),
        VariableByteDataModelElement("client_id", alphabet),
        FixedDataModelElement("sp2", b" "),
        VariableByteDataModelElement("user_id", alphabet),
        FixedDataModelElement("sp3", b" ["),
        DateTimeModelElement("time", b"%d/%b/%Y:%H:%M:%S%z"),
        FixedDataModelElement("sp4", b'] "'),
        FirstMatchModelElement("fm", [
            FixedDataModelElement("dash", b"-"),
            SequenceModelElement("request", [
                FixedWordlistDataModelElement("method", [
                    b"GET", b"POST", b"PUT", b"HEAD", b"DELETE", b"CONNECT",
                    b"OPTIONS", b"TRACE", b"PATCH"
                ]),
                FixedDataModelElement("sp5", b" "),
                DelimitedDataModelElement("request", b" ", b"\\"),
                FixedDataModelElement("sp6", b" "),
                DelimitedDataModelElement("version", b'"'),
            ])
        ]),
        FixedDataModelElement("sp7", b'" '),
        DecimalIntegerValueModelElement("status_code"),
        FixedDataModelElement("sp8", b" "),
        DecimalIntegerValueModelElement("content_size"),
        OptionalMatchModelElement(
            "combined",
            SequenceModelElement("combined", [
                FixedDataModelElement("sp9", b' "'),
                DelimitedDataModelElement("referer", b'"', b"\\"),
                FixedDataModelElement("sp10", b'" "'),
                DelimitedDataModelElement("user_agent", b'"', b"\\"),
                FixedDataModelElement("sp11", b'"'),
            ]))
    ])

    return model
Beispiel #15
0
def get_model():
    """Return a model to parse Apache Error logs from the AIT-LDS."""
    model = SequenceModelElement('model', [
        FixedDataModelElement('sp1', b'['),
        FixedWordlistDataModelElement('day', [b'Mon', b'Tue', b'Wed', b'Thu', b'Fri', b'Sat', b'Sun']),
        FixedDataModelElement('sp2', b' '),
        DateTimeModelElement('time', b'%b %d %H:%M:%S.%f %Y'),
        FixedDataModelElement('error_str', b'] [:error] [pid '),
        DecimalIntegerValueModelElement('pid'),
        FixedDataModelElement('sp3', b'] [client '),
        IpAddressDataModelElement('client_ip'),
        FixedDataModelElement('colon', b':'),
        DecimalIntegerValueModelElement('client_port'),
        FixedDataModelElement('php', b'] PHP '),
        FirstMatchModelElement('fphp', [
            SequenceModelElement('warning', [
                FixedDataModelElement('warning_str', b'Warning:  '),
                FirstMatchModelElement('warning', [
                    SequenceModelElement('declaration', [
                        FixedDataModelElement('declaration_str', b'Declaration of '),
                        DelimitedDataModelElement('function', b')'),
                        FixedDataModelElement('compatible_str', b') should be compatible with '),
                        DelimitedDataModelElement('function2', b')'),
                        FixedDataModelElement('compatible_str', b') in '),
                        DelimitedDataModelElement('path', b' '),
                        FixedDataModelElement('compatible_str', b' on line '),
                        DecimalIntegerValueModelElement('line'),
                        FixedDataModelElement('referer_str', b', referer: '),
                        AnyByteDataModelElement('referer')]),
                    SequenceModelElement('system', [
                        FixedDataModelElement('system_str', b'system(): Cannot execute a blank command in '),
                        DelimitedDataModelElement('path', b' '),
                        FixedDataModelElement('compatible_str', b' on line '),
                        DecimalIntegerValueModelElement('line')])])]),
            SequenceModelElement('notice', [
                FixedDataModelElement('notice_str', b'Notice:  Undefined index: '),
                DelimitedDataModelElement('command', b' '),
                FixedDataModelElement('sp', b' in '),
                DelimitedDataModelElement('path', b' '),
                FixedDataModelElement('compatible_str', b' on line '),
                DecimalIntegerValueModelElement('line')]),
            SequenceModelElement('deprecated', [
                FixedDataModelElement('deprecated_str', b'Deprecated:  Methods with the same name as their class '
                                      b'will not be constructors in a future version of PHP; '),
                DelimitedDataModelElement('class', b' '),
                FixedDataModelElement('constructor_str', b' has a deprecated constructor in '),
                DelimitedDataModelElement('path', b' '),
                FixedDataModelElement('compatible_str', b' on line '),
                DecimalIntegerValueModelElement('line'),
                FixedDataModelElement('referer_str', b', referer: '),
                AnyByteDataModelElement('referer'),
                ])])])

    return model
Beispiel #16
0
def get_model():
    """Return a model to parse Apache Access logs from the AIT-LDS."""
    alphabet = b'!"#$%&\'()*+,-./0123456789:;<>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz{|}~=[]'

    model = SequenceModelElement('model', [
        FirstMatchModelElement('client_ip', [
            IpAddressDataModelElement('client_ip'),
            FixedDataModelElement('localhost', b'::1')
            ]),
        FixedDataModelElement('sp1', b' '),
        VariableByteDataModelElement('client_id', alphabet),
        FixedDataModelElement('sp2', b' '),
        VariableByteDataModelElement('user_id', alphabet),
        FixedDataModelElement('sp3', b' ['),
        DateTimeModelElement('time', b'%d/%b/%Y:%H:%M:%S'),
        FixedDataModelElement('sp4', b' +'),
        DecimalIntegerValueModelElement('tz'),
        FixedDataModelElement('sp5', b'] "'),
        FirstMatchModelElement('fm', [
            FixedDataModelElement('dash', b'-'),
            SequenceModelElement('request', [
                FixedWordlistDataModelElement('method', [
                    b'GET', b'POST', b'PUT', b'HEAD', b'DELETE', b'CONNECT', b'OPTIONS', b'TRACE', b'PATCH']),
                FixedDataModelElement('sp6', b' '),
                DelimitedDataModelElement('request', b' ', b'\\'),
                FixedDataModelElement('sp7', b' '),
                DelimitedDataModelElement('version', b'"'),
                ])
            ]),
        FixedDataModelElement('sp8', b'" '),
        DecimalIntegerValueModelElement('status_code'),
        FixedDataModelElement('sp9', b' '),
        DecimalIntegerValueModelElement('content_size'),
        OptionalMatchModelElement(
            'combined', SequenceModelElement('combined', [
                FixedDataModelElement('sp10', b' "'),
                DelimitedDataModelElement('referer', b'"', b'\\'),
                FixedDataModelElement('sp11', b'" "'),
                DelimitedDataModelElement('user_agent', b'"', b'\\'),
                FixedDataModelElement('sp12', b'"'),
                ])),
        ])

    return model
Beispiel #17
0
def get_model():
    """Return a model for su session information messages after any standard logging preamble, e.g. from syslog."""
    type_children = [
        SequenceModelElement('build-stack', [
            FixedDataModelElement('s0', b'building new pluginstance stack: \''),
            DelimitedDataModelElement('stack', b'\''),
            FixedDataModelElement('s1', b'\'')
        ]),
        SequenceModelElement('nfct-event', [
            FixedDataModelElement('s0', b'[DESTROY] ORIG: SRC='),
            IpAddressDataModelElement('osrcip'),
            FixedDataModelElement('s1', b' DST='),
            IpAddressDataModelElement('odstip'),
            FixedDataModelElement('s2', b' PROTO='),
            FixedWordlistDataModelElement('proto', [b'TCP', b'UDP']),
            FixedDataModelElement('s3', b' SPT='),
            DecimalIntegerValueModelElement('ospt'),
            FixedDataModelElement('s4', b' DPT='),
            DecimalIntegerValueModelElement('odpt'),
            FixedDataModelElement('s5', b' PKTS='),
            DecimalIntegerValueModelElement('opkts'),
            FixedDataModelElement('s6', b' BYTES='),
            DecimalIntegerValueModelElement('obytes'),
            FixedDataModelElement('s7', b' , REPLY: SRC='),
            IpAddressDataModelElement('rsrcip'),
            FixedDataModelElement('s8', b' DST='),
            IpAddressDataModelElement('rdstip'),
            FixedDataModelElement('s9', b' PROTO='),
            FixedWordlistDataModelElement('rproto', [b'TCP', b'UDP']),
            FixedDataModelElement('s10', b' SPT='),
            DecimalIntegerValueModelElement('rspt'),
            FixedDataModelElement('s11', b' DPT='),
            DecimalIntegerValueModelElement('rdpt'),
            FixedDataModelElement('s12', b' PKTS='),
            DecimalIntegerValueModelElement('rpkts'),
            FixedDataModelElement('s13', b' BYTES='),
            DecimalIntegerValueModelElement('rbytes'),
            # No additional whitespace from Ubuntu Trusty 14.04 on.
            OptionalMatchModelElement('tail', FixedDataModelElement('s0', b' '))
        ]),
        FixedDataModelElement('nfct-plugin', b'NFCT plugin working in event mode'),
        FixedDataModelElement('reopen', b'reopening capture file'),
        FixedDataModelElement('signal', b'signal received, calling pluginstances'),
        FixedDataModelElement('uidchange', b'Changing UID / GID')
    ]

    # Netflow entry
    model = SequenceModelElement('ulogd', [
        FixedDataModelElement('sname', b'ulogd['),
        DecimalIntegerValueModelElement('pid'),
        FixedDataModelElement('s0', b']: '),
        FirstMatchModelElement('msg', type_children)
    ])
    return model
def get_model():
    """Get the model."""
    interface_name_model = VariableByteDataModelElement('interface', b'0123456789abcdefghijklmnopqrstuvwxyz.')

    type_children = [
        SequenceModelElement('exit', [
            FixedDataModelElement('s0', b'ntpd exiting on signal '),
            DecimalIntegerValueModelElement('signal')
        ]),
        SequenceModelElement('listen-drop', [
            FixedDataModelElement('s0', b'Listen and drop on '),
            DecimalIntegerValueModelElement('fd'),
            FixedDataModelElement('s1', b' '),
            interface_name_model,
            FixedDataModelElement('s2', b' '),
            FirstMatchModelElement('address', [
                IpAddressDataModelElement('ipv4'),
                DelimitedDataModelElement('ipv6', b' ')
            ]),
            FixedDataModelElement('s3', b' UDP 123')
        ]),
        SequenceModelElement('listen-normal', [
            FixedDataModelElement('s0', b'Listen normally on '),
            DecimalIntegerValueModelElement('fd'),
            FixedDataModelElement('s1', b' '),
            interface_name_model,
            FixedDataModelElement('s2', b' '),
            IpAddressDataModelElement('ip'),
            FirstMatchModelElement('msg', [
                FixedDataModelElement('port-new', b':123'),
                FixedDataModelElement('port-old', b' UDP 123')
            ])
        ]),
        SequenceModelElement('listen-routing', [
            FixedDataModelElement('s0', b'Listening on routing socket on fd #'),
            DecimalIntegerValueModelElement('fd'),
            FixedDataModelElement('s1', b' for interface updates')
        ]),
        FixedDataModelElement('new-interfaces', b'new interface(s) found: waking up resolver'),
        FixedDataModelElement('ntp-io', b'ntp_io: estimated max descriptors: 1024, initial socket boundary: 16'),
        FixedDataModelElement('peers-refreshed', b'peers refreshed'),
        SequenceModelElement('precision', [
            FixedDataModelElement('s0', b'proto: precision = '),
            DecimalFloatValueModelElement('precision'),
            FixedDataModelElement('s1', b' usec')])]

    model = SequenceModelElement('ntpd', [
        FixedDataModelElement('sname', b'ntpd['),
        DecimalIntegerValueModelElement('pid'),
        FixedDataModelElement('s0', b']: '),
        FirstMatchModelElement('msg', type_children)
    ])
    return model
Beispiel #19
0
    def test14get_match_element_match_context_input_validation(self):
        """Check if an exception is raised, when other classes than MatchContext are used in get_match_element."""
        model_element = DelimitedDataModelElement(self.id_, self.delimiter)
        data = b"one, two, three"
        model_element.get_match_element(self.path, DummyMatchContext(data))
        model_element.get_match_element(self.path, MatchContext(data))

        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, MatchElement(None, data, None, None))
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, data)
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, data.decode())
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, True)
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, 123)
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, 123.22)
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, None)
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, [])
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, {"key": MatchContext(data)})
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, set())
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, ())
        self.assertRaises(AttributeError, model_element.get_match_element,
                          self.path, model_element)
Beispiel #20
0
def get_model(user_name_model=None):
    """Return a model to parse a cron message logged via syslog after any standard logging preamble, e.g. from syslog."""
    if user_name_model is None:
        user_name_model = VariableByteDataModelElement(
            'user', b'0123456789abcdefghijklmnopqrstuvwxyz.-')

    type_children = [
        SequenceModelElement('exec', [
            FixedDataModelElement('s0', b'('), user_name_model,
            FixedDataModelElement('s1', b') CMD '),
            AnyByteDataModelElement('command')
        ]),
        SequenceModelElement('pam', [
            FixedDataModelElement('s0', b'pam_unix(cron:session): session '),
            FixedWordlistDataModelElement('change', [b'opened', b'closed']),
            FixedDataModelElement('s1', b' for user '), user_name_model,
            OptionalMatchModelElement(
                'openby', FixedDataModelElement('default', b' by (uid=0)'))
        ])
    ]

    model = FirstMatchModelElement('cron', [
        SequenceModelElement('std', [
            FixedDataModelElement('sname', b'CRON['),
            DecimalIntegerValueModelElement('pid'),
            FixedDataModelElement('s0', b']: '),
            FirstMatchModelElement('msgtype', type_children)
        ]),
        SequenceModelElement('low', [
            FixedDataModelElement('sname', b'cron['),
            DecimalIntegerValueModelElement('pid'),
            FixedDataModelElement('s0', b']: (*system*'),
            DelimitedDataModelElement('rname', b') RELOAD ('),
            FixedDataModelElement('s1', b') RELOAD ('),
            DelimitedDataModelElement('fname', b')'),
            FixedDataModelElement('s2', b')'),
        ])
    ])
    return model
def get_model():
    """Return the model."""
    type_children = [
        SequenceModelElement('sent', [
            FixedDataModelElement('s0', b'Sent mail for '),
            DelimitedDataModelElement('to-addr', b' ('),
            FixedDataModelElement('s1', b' ('),
            DelimitedDataModelElement('status', b') uid='),
            FixedDataModelElement('s2', b') uid='),
            DecimalIntegerValueModelElement('uid'),
            FixedDataModelElement('s3', b' username='******'username', b' outbytes='),
            FixedDataModelElement('s4', b' outbytes='),
            DecimalIntegerValueModelElement('bytes')
        ])
    ]

    model = SequenceModelElement('ssmtp', [
        FixedDataModelElement('sname', b'sSMTP['),
        DecimalIntegerValueModelElement('pid'),
        FixedDataModelElement('s0', b']: '),
        FirstMatchModelElement('msg', type_children)
    ])
    return model
Beispiel #22
0
    def test3delimeter_none_empty_or_not_printable(self):
        """In this test case all not allowed values are tested."""
        match_context = MatchContext(self.match_context_string)
        delimited_data_model_element = DelimitedDataModelElement('id', b'')
        match_element = delimited_data_model_element.get_match_element(
            'match', match_context)
        self.assertEqual(match_element, None)

        match_context = MatchContext(self.match_context_string)
        delimited_data_model_element = DelimitedDataModelElement('id', None)
        self.assertRaises(TypeError,
                          delimited_data_model_element.get_match_element,
                          'match', match_context)

        match_context = MatchContext(self.match_context_string)
        delimited_data_model_element = DelimitedDataModelElement('id', b'\x01')
        match_element = delimited_data_model_element.get_match_element(
            'match', match_context)
        self.assertEqual(match_element, None)
def get_tmp_files_model():
    """Return a model to parse a systemd tmpfiles daemon message after any standard logging preamble, e.g. from syslog."""
    type_children = [
        SequenceModelElement("duplicate", [
            FixedDataModelElement("s0", b'[/usr/lib/tmpfiles.d/var.conf:14] Duplicate line for path "'),
            DelimitedDataModelElement("path", b'", ignoring.'),
            FixedDataModelElement("s2", b'", ignoring.')
        ])
    ]
    # Will fail on username models including the dot at the end.

    model = SequenceModelElement("systemd-tmpfiles", [
        FixedDataModelElement("sname", b"systemd-tmpfiles["),
        DecimalIntegerValueModelElement("pid"),
        FixedDataModelElement("s0", b"]: "),
        FirstMatchModelElement("msg", type_children)
    ])
    return model
Beispiel #24
0
    def test1delimeter_single_char(self):
        """A single character is used as delimeter."""
        match_context = MatchContext(self.match_context_string)
        delimited_data_model_element = DelimitedDataModelElement('id', b'c')
        match_element = delimited_data_model_element.get_match_element(
            'match', match_context)
        self.assertEqual(match_element.get_match_string(), b'this is a mat')

        match_context = MatchContext(self.match_context_string)
        delimited_data_model_element = DelimitedDataModelElement('id', b'f')
        match_element = delimited_data_model_element.get_match_element(
            'match', match_context)
        self.assertEqual(match_element, None)
def get_model(user_name_model=None):
    """Return a model to parse a su session information message after any standard logging preamble, e.g. from syslog."""
    if user_name_model is None:
        user_name_model = VariableByteDataModelElement(
            "user", b"0123456789abcdefghijklmnopqrstuvwxyz.-")
    srcuser_name_model = VariableByteDataModelElement(
        "srcuser", b"0123456789abcdefghijklmnopqrstuvwxyz.-")

    type_children = [
        SequenceModelElement("su-good", [
            FixedDataModelElement("s0", b"Successful su for "),
            user_name_model,
            FixedDataModelElement("s1", b" by "), srcuser_name_model
        ]),
        SequenceModelElement("su-good", [
            FixedDataModelElement("s0", b"+ "),
            DelimitedDataModelElement("terminal", b" "),
            FixedDataModelElement("s1", b" "), srcuser_name_model,
            FixedDataModelElement("s2", b":"), user_name_model
        ]),
        SequenceModelElement("pam", [
            FixedDataModelElement("s0", b"pam_unix(su:session): session "),
            FixedWordlistDataModelElement("change", [b"opened", b"closed"]),
            FixedDataModelElement("s1", b" for user "), user_name_model,
            OptionalMatchModelElement(
                "openby",
                SequenceModelElement("userinfo", [
                    FixedDataModelElement("s0", b" by (uid="),
                    DecimalIntegerValueModelElement("uid"),
                    FixedDataModelElement("s1", b")")
                ]))
        ])
    ]

    model = SequenceModelElement("su", [
        FixedDataModelElement("sname", b"su["),
        DecimalIntegerValueModelElement("pid"),
        FixedDataModelElement("s0", b"]: "),
        FirstMatchModelElement("msg", type_children)
    ])
    return model
def get_model(user_name_model=None):
    """Return a model to parse a su session information message after any standard logging preamble, e.g. from syslog."""
    if user_name_model is None:
        user_name_model = VariableByteDataModelElement(
            'user', b'0123456789abcdefghijklmnopqrstuvwxyz.-')
    srcuser_name_model = VariableByteDataModelElement(
        'srcuser', b'0123456789abcdefghijklmnopqrstuvwxyz.-')

    type_children = [
        SequenceModelElement('su-good', [
            FixedDataModelElement('s0', b'Successful su for '),
            user_name_model,
            FixedDataModelElement('s1', b' by '), srcuser_name_model
        ]),
        SequenceModelElement('su-good', [
            FixedDataModelElement('s0', b'+ '),
            DelimitedDataModelElement('terminal', b' '),
            FixedDataModelElement('s1', b' '), srcuser_name_model,
            FixedDataModelElement('s2', b':'), user_name_model
        ]),
        SequenceModelElement('pam', [
            FixedDataModelElement('s0', b'pam_unix(su:session): session '),
            FixedWordlistDataModelElement('change', [b'opened', b'closed']),
            FixedDataModelElement('s1', b' for user '), user_name_model,
            OptionalMatchModelElement(
                'openby',
                SequenceModelElement('userinfo', [
                    FixedDataModelElement('s0', b' by (uid='),
                    DecimalIntegerValueModelElement('uid'),
                    FixedDataModelElement('s1', b')')
                ]))
        ])
    ]

    model = SequenceModelElement('su', [
        FixedDataModelElement('sname', b'su['),
        DecimalIntegerValueModelElement('pid'),
        FixedDataModelElement('s0', b']: '),
        FirstMatchModelElement('msg', type_children)
    ])
    return model
Beispiel #27
0
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement(
            'Space',
            b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'
        ),
        DelimitedDataModelElement('Data', b'%'),
        AnyByteDataModelElement('Rest')
    ]

    service_children_login_details = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status',
                                      [b' logged in', b' logged out']),
        OptionalMatchModelElement(
            'PastTime',
            SequenceModelElement('Time', [
                FixedDataModelElement('Blank', b' '),
                DecimalIntegerValueModelElement('Minutes'),
                FixedDataModelElement('Ago', b' minutes ago.')
            ]))
    ]

    service_children_cron_job = [
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', b' cron['),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')
    ]

    service_children_random_time = [
        FixedDataModelElement('Space', b'Random: '),
        DecimalIntegerValueModelElement('Random')
    ]

    service_children_sensors = [
        SequenceModelElement('CPUTemp', [
            FixedDataModelElement('FixedTemp', b'CPU Temp: '),
            DecimalIntegerValueModelElement('Temp'),
            FixedDataModelElement('Degrees', b'\xc2\xb0C')
        ]),
        FixedDataModelElement('Space1', b', '),
        SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('FixedWorkload', b'CPUWorkload: '),
            DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')
        ]),
        FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S')
    ]

    service_children_user_ip_address = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '),
        IpAddressDataModelElement('IP')
    ]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', b' cron['),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('StartTime', b'\' in 5 min.')
    ]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', b'%Y-%m-%d %H:%M:%S'),
        FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', b' cron['),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')
    ]

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement',
                             service_children_cron_job_announcement),
        SequenceModelElement('CronExecution',
                             service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job),
        SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details),
        DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time),
        SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filters = AtomFilters.SubhandlerFilter(None)
    analysis_context.register_component(atom_filters,
                                        component_name="AtomFilter")

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
        [atom_filters])
    analysis_context.register_component(
        simple_monotonic_timestamp_adjust,
        component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(
        analysis_context)  # skipcq: BAN-B108
    from aminer.events.Utils import VolatileLogarithmicBackoffEventHistory
    volatile_logarithmic_backoff_event_history = VolatileLogarithmicBackoffEventHistory(
        100)
    anomaly_event_handlers = [
        stream_printer_event_handler,
        volatile_logarithmic_backoff_event_history
    ]
    analysis_context.register_component(
        volatile_logarithmic_backoff_event_history,
        component_name="VolatileLogarithmicBackoffEventHistory")

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_monotonic_timestamp_adjust],
        anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filters.add_handler(simple_unparsed_atom_handler,
                             stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(
        analysis_context.aminer_config, anomaly_event_handlers)
    atom_filters.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(
        timestamps_unsorted_detector,
        component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule(
                    '/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(
                    Rules.ValueMatchRule('/model/LoginDetails/Username',
                                         b'root'))
            ]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(
                    Rules.PathExistsMatchRule(
                        '/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails')
            ]),
            Rules.NegationMatchRule(
                Rules.PathExistsMatchRule('/model/LoginDetails'))
        ])
    ]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, allowlist_rules,
        anomaly_event_handlers)
    analysis_context.register_component(allowlist_violation_detector,
                                        component_name="Allowlist")
    atom_filters.add_handler(allowlist_violation_detector)

    from aminer.analysis.ParserCount import ParserCount
    parser_count = ParserCount(analysis_context.aminer_config, None,
                               anomaly_event_handlers, 10)
    analysis_context.register_component(parser_count,
                                        component_name="ParserCount")
    atom_filters.add_handler(parser_count)

    from aminer.analysis.EventCorrelationDetector import EventCorrelationDetector
    ecd = EventCorrelationDetector(analysis_context.aminer_config,
                                   anomaly_event_handlers,
                                   check_rules_flag=True,
                                   hypothesis_max_delta_time=1.0,
                                   auto_include_flag=True)
    analysis_context.register_component(
        ecd, component_name="EventCorrelationDetector")
    atom_filters.add_handler(ecd)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filters.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10000th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(
            tuple(match_value_list))
        if extra_data is not None:
            mod = 10000
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/UName', '/model/DailyCron/JobNumber'],
        anomaly_event_handlers,
        auto_include_flag=False,
        tuple_transformation_function=tuple_transformation_function)
    analysis_context.register_component(
        enhanced_new_match_path_value_combo_detector,
        component_name="EnhancedNewValueCombo")
    atom_filters.add_handler(enhanced_new_match_path_value_combo_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(
        86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(
        analysis_context.aminer_config,
        [('/model/RandomTime/Random', modulo_time_bin_definition),
         ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers)
    analysis_context.register_component(histogram_analysis,
                                        component_name="HistogramAnalysis")
    atom_filters.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime',
        modulo_time_bin_definition, 10, anomaly_event_handlers)
    analysis_context.register_component(
        path_dependent_histogram_analysis,
        component_name="PathDependentHistogramAnalysis")
    atom_filters.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(
        analysis_context.aminer_config, anomaly_event_handlers, None,
        ['/model/Random'], 100, 10)
    analysis_context.register_component(
        match_value_average_change_detector,
        component_name="MatchValueAverageChange")
    atom_filters.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(sys.stdout, [
        '/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload',
        '/model/Sensors/DTM'
    ], b';', b'')
    analysis_context.register_component(
        match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filters.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers,
        auto_include_flag=False)
    analysis_context.register_component(
        new_match_path_value_combo_detector,
        component_name="NewMatchPathValueCombo")
    atom_filters.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchIdValueComboDetector import NewMatchIdValueComboDetector
    new_match_id_value_combo_detector = NewMatchIdValueComboDetector(
        analysis_context.aminer_config,
        ['/model/type/path/name', '/model/type/syscall/syscall'],
        anomaly_event_handlers,
        id_path_list=['/model/type/path/id', '/model/type/syscall/id'],
        min_allowed_time_diff=5,
        auto_include_flag=True,
        allow_missing_values_flag=True,
        output_log_line=True)
    analysis_context.register_component(
        new_match_id_value_combo_detector,
        component_name="NewMatchIdValueComboDetector")
    atom_filters.add_handler(new_match_id_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/Job Number', '/model/IPAddresses/Username'],
        anomaly_event_handlers,
        auto_include_flag=False)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filters.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'],
        anomaly_event_handlers,
        auto_include_flag=False,
        default_interval=2,
        realert_interval=5)
    analysis_context.register_component(missing_match_path_value_detector,
                                        component_name="MissingMatch")
    atom_filters.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        2,
        min_rule_attributes=1,
        max_rule_attributes=5,
        record_count_before_event=70000,
        output_log_line=True)
    analysis_context.register_component(
        time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filters.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule(
        'CronJobAnnouncement',
        5,
        6,
        max_artefacts_a_for_single_b=1,
        artefact_match_parameters=[('/model/CronAnnouncement/JobNumber',
                                    '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement',
                                          [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None,
                                          [cron_job_announcement])
    rules = [
        Rules.PathExistsMatchRule('/model/CronAnnouncement/Run',
                                  a_class_selector),
        Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)
    ]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(
        analysis_context.aminer_config, rules, anomaly_event_handlers)
    analysis_context.register_component(
        time_correlation_violation_detector,
        component_name="TimeCorrelationViolationDetector")
    atom_filters.add_handler(time_correlation_violation_detector)

    from aminer.events.DefaultMailNotificationEventHandler import DefaultMailNotificationEventHandler
    if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysis_context.aminer_config.config_properties:
        mail_notification_handler = DefaultMailNotificationEventHandler(
            analysis_context)
        analysis_context.register_component(mail_notification_handler,
                                            component_name="MailHandler")
        anomaly_event_handlers.append(mail_notification_handler)
Beispiel #28
0
def get_model(user_name_model=None):
    """Return a model to parse a sshd information message after any standard logging preamble, e.g. from syslog."""
    if user_name_model is None:
        user_name_model = VariableByteDataModelElement(
            "user", b"0123456789abcdefghijklmnopqrstuvwxyz.-")

    from_str = b" from "
    port = b" port "
    preauth = b" [preauth]"

    type_children = [
        SequenceModelElement("accepted key", [
            FixedDataModelElement("s0", b"Accepted publickey for "),
            user_name_model,
            FixedDataModelElement("s1", from_str),
            IpAddressDataModelElement("clientip"),
            FixedDataModelElement("s2", port),
            DecimalIntegerValueModelElement("port"),
            FixedDataModelElement("s3", b" ssh2: "),
            DelimitedDataModelElement(
                "asym-algorithm", b" ", consume_delimiter=True),
            VariableByteDataModelElement(
                "fingerprint",
                b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/:"
            ),
            OptionalMatchModelElement(
                "opt",
                SequenceModelElement("seq", [
                    FixedDataModelElement("s4", b" ID "),
                    DelimitedDataModelElement("id", b" "),
                    FixedDataModelElement("s5", b" (serial "),
                    DecimalIntegerValueModelElement("serial"),
                    FixedDataModelElement("s6", b") CA "),
                    AnyByteDataModelElement("algorithm_details")
                ]))
        ]),
        SequenceModelElement("btmp-perm", [
            FixedDataModelElement(
                "s0",
                b"Excess permission or bad ownership on file /var/log/btmp")
        ]),
        SequenceModelElement("close-sess", [
            FixedDataModelElement("s0", b"Close session: user "),
            user_name_model,
            FixedDataModelElement("s1", from_str),
            IpAddressDataModelElement("clientip"),
            FixedDataModelElement("s2", port),
            DecimalIntegerValueModelElement("port"),
            FixedDataModelElement("s3", b" id "),
            DecimalIntegerValueModelElement("userid")
        ]),
        SequenceModelElement("closing", [
            FixedDataModelElement("s0", b"Closing connection to "),
            IpAddressDataModelElement("clientip"),
            FixedDataModelElement("s1", port),
            DecimalIntegerValueModelElement("port")
        ]),
        SequenceModelElement("closed", [
            FixedDataModelElement("s0", b"Connection closed by "),
            FirstMatchModelElement("fm", [
                IpAddressDataModelElement("clientip"),
                SequenceModelElement("seq", [
                    FixedWordlistDataModelElement(
                        "user-type", [b"authenticating", b"invalid"]),
                    FixedDataModelElement("s1", b" user "),
                    DelimitedDataModelElement("user", b" "),
                    FixedDataModelElement("s2", b" "),
                    FirstMatchModelElement("fm", [
                        IpAddressDataModelElement("ip"),
                        IpAddressDataModelElement("ipv6", ipv6=True)
                    ]),
                    FixedDataModelElement("s3", b" port "),
                    DecimalIntegerValueModelElement("port"),
                    FixedDataModelElement("s4", b" [preauth]")
                ])
            ])
        ]),
        SequenceModelElement("connect", [
            FixedDataModelElement("s0", b"Connection from "),
            IpAddressDataModelElement("clientip"),
            FixedDataModelElement("s1", port),
            DecimalIntegerValueModelElement("port"),
            FixedDataModelElement("s2", b" on "),
            IpAddressDataModelElement("serverip"),
            FixedDataModelElement("s3", port),
            DecimalIntegerValueModelElement("sport")
        ]),
        SequenceModelElement("disconnectreq", [
            FixedDataModelElement("s0", b"Received disconnect from "),
            IpAddressDataModelElement("clientip"),
            FixedDataModelElement("s1", port),
            DecimalIntegerValueModelElement("port"),
            FixedDataModelElement("s2", b":"),
            DecimalIntegerValueModelElement("session"),
            FixedDataModelElement("s3", b": "),
            FixedWordlistDataModelElement("reason", [b"disconnected by user"])
        ]),
        SequenceModelElement("disconnected", [
            FixedDataModelElement("s0", b"Disconnected from "),
            IpAddressDataModelElement("clientip"),
            FixedDataModelElement("s1", port),
            DecimalIntegerValueModelElement("port")
        ]),
        FixedDataModelElement("error-bind",
                              b"error: bind: Cannot assign requested address"),
        SequenceModelElement("error-max-auth", [
            FixedDataModelElement(
                "s0", b"error: maximum authentication attempts exceeded for "),
            OptionalMatchModelElement(
                "opt", FixedDataModelElement("invalid", b"invalid user ")),
            DelimitedDataModelElement("user", b" "),
            FixedDataModelElement("s1", b" from "),
            FirstMatchModelElement("fm", [
                IpAddressDataModelElement("from_ip"),
                IpAddressDataModelElement("from_ip_v6", ipv6=True)
            ]),
            FixedDataModelElement("s2", b" port "),
            DecimalIntegerValueModelElement("port"),
            FixedDataModelElement("s3", b" ssh2 [preauth]")
        ]),
        FixedDataModelElement("error-cert-exp",
                              b"error: Certificate invalid: expired"),
        FixedDataModelElement("error-cert-not-yet-valid",
                              b"error: Certificate invalid: not yet valid"),
        FixedDataModelElement(
            "error-cert-not-listed-principal",
            b"error: Certificate invalid: name is not a listed principal"),
        FixedDataModelElement("error-refused-by-options",
                              b"error: Refused by certificate options"),
        SequenceModelElement("error-channel-setup", [
            FixedDataModelElement(
                "s0",
                b"error: channel_setup_fwd_listener: cannot listen to port: "),
            DecimalIntegerValueModelElement("port")
        ]),
        SequenceModelElement("error-auth-key", [
            FixedDataModelElement("s0", b"error: Authentication key "),
            DelimitedDataModelElement("asym-algorithm", b" "),
            FixedDataModelElement("s1", b" "),
            DelimitedDataModelElement("hash", b" "),
            FixedDataModelElement("s2", b" revoked by file "),
            AnyByteDataModelElement("file")
        ]),
        SequenceModelElement("error-load-key", [
            FixedDataModelElement("s0", b"error: Could not load host key: "),
            AnyByteDataModelElement("file")
        ]),
        SequenceModelElement("ident-missing", [
            FixedDataModelElement(
                "s0", b"Did not receive identification string from "),
            IpAddressDataModelElement("clientip")
        ]),
        SequenceModelElement("invalid-user", [
            FixedDataModelElement("s0", b"Invalid user "),
            DelimitedDataModelElement("user", from_str),
            FixedDataModelElement("s1", from_str),
            FirstMatchModelElement("fm", [
                IpAddressDataModelElement("from_ip"),
                IpAddressDataModelElement("from_ip_v6", ipv6=True)
            ]),
            FixedDataModelElement("s2", b" port "),
            DecimalIntegerValueModelElement("port")
        ]),
        SequenceModelElement("invalid-user-auth-req", [
            FixedDataModelElement("s0",
                                  b"input_userauth_request: invalid user "),
            DelimitedDataModelElement("user", preauth),
            FixedDataModelElement("s1", preauth)
        ]),
        SequenceModelElement("postppk", [
            FixedDataModelElement("s0", b"Postponed publickey for "),
            user_name_model,
            FixedDataModelElement("s1", from_str),
            IpAddressDataModelElement("clientip"),
            FixedDataModelElement("s2", port),
            DecimalIntegerValueModelElement("port"),
            FixedDataModelElement("s3", b" ssh2 [preauth]")
        ]),
        SequenceModelElement("readerr", [
            FixedDataModelElement("s0", b"Read error from remote host "),
            IpAddressDataModelElement("clientip"),
            FixedDataModelElement("s1", b": Connection timed out")
        ]),
        SequenceModelElement("disconnect", [
            FixedDataModelElement("s0", b"Received disconnect from "),
            FirstMatchModelElement("fm", [
                IpAddressDataModelElement("from_ip"),
                IpAddressDataModelElement("from_ip_v6", ipv6=True)
            ]),
            FixedDataModelElement("s1", b": 11: "),
            FirstMatchModelElement("reason", [
                FixedDataModelElement("disconnected", b"disconnected by user"),
                SequenceModelElement("remotemsg", [
                    DelimitedDataModelElement("msg", preauth),
                    FixedDataModelElement("s0", preauth)
                ])
            ])
        ]),
        SequenceModelElement("signal", [
            FixedDataModelElement("s0", b"Received signal "),
            DecimalIntegerValueModelElement("signal"),
            FixedDataModelElement("s1", b"; terminating.")
        ]),
        SequenceModelElement("server", [
            FixedDataModelElement("s0", b"Server listening on "),
            DelimitedDataModelElement("serverip", b" "),
            FixedDataModelElement("s1", port),
            DecimalIntegerValueModelElement("port"),
            FixedDataModelElement("s2", b".")
        ]),
        SequenceModelElement("oom-adjust", [
            FixedDataModelElement("s0", b"Set /proc/self/oom_score_adj "),
            OptionalMatchModelElement(
                "from", FixedDataModelElement("default", b"from 0 ")),
            FixedDataModelElement("s1", b"to "),
            DecimalIntegerValueModelElement(
                "newval",
                value_sign_type=DecimalIntegerValueModelElement.
                SIGN_TYPE_OPTIONAL)
        ]),
        SequenceModelElement("session-start", [
            FixedDataModelElement("s0", b"Starting session: "),
            FirstMatchModelElement("sess-info", [
                SequenceModelElement("shell", [
                    FixedDataModelElement("s0", b"shell on "),
                    DelimitedDataModelElement("terminal", b" ")
                ]),
                SequenceModelElement(
                    "subsystem",
                    [FixedDataModelElement("s0", b"subsystem \"sftp\"")]),
                SequenceModelElement("forced-command", [
                    FixedDataModelElement("s0",
                                          b"forced-command (key-option) \""),
                    DelimitedDataModelElement("command", b"\" for "),
                    FixedDataModelElement("s1", b"\"")
                ])
            ]),
            FixedDataModelElement("s1", b" for "), user_name_model,
            FixedDataModelElement("s2", from_str),
            IpAddressDataModelElement("clientip"),
            FixedDataModelElement("s3", port),
            DecimalIntegerValueModelElement("port"),
            OptionalMatchModelElement(
                "idinfo",
                SequenceModelElement("idinfo", [
                    FixedDataModelElement("s0", b" id "),
                    DecimalIntegerValueModelElement("id")
                ]))
        ]),
        SequenceModelElement("transferred", [
            FixedDataModelElement("s0", b"Transferred: sent "),
            DecimalIntegerValueModelElement("sent"),
            FixedDataModelElement("s1", b", received "),
            DecimalIntegerValueModelElement("received"),
            FixedDataModelElement("s1", b" bytes")
        ]),
        SequenceModelElement("pam", [
            FixedDataModelElement("s0", b"pam_unix(sshd:session): session "),
            FixedWordlistDataModelElement("change", [b"opened", b"closed"]),
            FixedDataModelElement("s1", b" for user "), user_name_model,
            OptionalMatchModelElement(
                "openby", FixedDataModelElement("default", b" by (uid=0)"))
        ]),
        SequenceModelElement("child", [
            FixedDataModelElement("s0", b"User child is on pid "),
            DecimalIntegerValueModelElement("pid")
        ]),
        SequenceModelElement("failed/accept", [
            FixedWordlistDataModelElement("s0", [b"Failed ", b"Accepted "]),
            FixedWordlistDataModelElement(
                "type", [b"password", b"none", b"publickey"]),
            FixedDataModelElement("s1", b" for "),
            OptionalMatchModelElement(
                "opt", FixedDataModelElement("invalid", b"invalid user ")),
            DelimitedDataModelElement("user", b" "),
            FixedDataModelElement("s2", b" from "),
            FirstMatchModelElement("fm", [
                IpAddressDataModelElement("from_ip"),
                IpAddressDataModelElement("from_ip_v6", ipv6=True)
            ]),
            FixedDataModelElement("s3", b" port "),
            DecimalIntegerValueModelElement("port"),
            AnyByteDataModelElement("service")
        ]),
        SequenceModelElement("disconnecting", [
            FixedDataModelElement("s0", b"Disconnecting "),
            FixedWordlistDataModelElement("type",
                                          [b"authenticating", b"invalid"]),
            FixedDataModelElement("s1", b" user "),
            DelimitedDataModelElement("user", b" "),
            FixedDataModelElement("s1", b" "),
            IpAddressDataModelElement("ip"),
            FixedDataModelElement("s2", b" port "),
            DecimalIntegerValueModelElement("port"),
            FixedDataModelElement(
                "s3", b": Too many authentication failures [preauth]")
        ]),
        SequenceModelElement("fatal", [
            FixedDataModelElement(
                "s0", b"fatal: Timeout before authentication for "),
            IpAddressDataModelElement("ip"),
            FixedDataModelElement("s1", b" port "),
            DecimalIntegerValueModelElement("port")
        ]),
        SequenceModelElement("cert-auth", [
            FixedDataModelElement("s0", b"cert: Authentication tried for "),
            DelimitedDataModelElement("user", b" "),
            FixedDataModelElement(
                "s1",
                b" with valid certificate but not from a permitted source address ("
            ),
            IpAddressDataModelElement("ip"),
            FixedDataModelElement("s2", b")."),
        ]),
        SequenceModelElement("change-root-dir", [
            FixedDataModelElement("s0", b"Changed root directory to \""),
            DelimitedDataModelElement("root-dir", b"\""),
            FixedDataModelElement("s1", b"\"")
        ]),
        FixedDataModelElement("subsystem-request",
                              b"subsystem request for sftp"),
        SequenceModelElement("conn-write-poll", [
            FixedDataModelElement("s0",
                                  b"packet_write_poll: Connection from "),
            IpAddressDataModelElement("from_ip"),
            FixedDataModelElement("s1", b" port "),
            DecimalIntegerValueModelElement("port"),
            FixedDataModelElement("s2", b": Host is down")
        ]),
        SequenceModelElement("debug", [
            FixedDataModelElement("s0", b"debug"),
            DecimalIntegerValueModelElement("debug-num"),
            FixedDataModelElement("s1", b": "),
            FirstMatchModelElement("fm", [
                SequenceModelElement("seq1", [
                    FixedDataModelElement("s2", b"Got "),
                    DecimalIntegerValueModelElement("num1"),
                    FixedDataModelElement("s3", b"/"),
                    DecimalIntegerValueModelElement("num2"),
                    FixedDataModelElement("s4", b" for keepalive")
                ]),
                SequenceModelElement("seq2", [
                    FixedDataModelElement("s2", b"channel "),
                    DecimalIntegerValueModelElement("channel-num"),
                    FixedDataModelElement("s3", b": request "),
                    DelimitedDataModelElement("mail", b" "),
                    FixedDataModelElement("s4", b" confirm "),
                    DecimalIntegerValueModelElement("num")
                ]),
                SequenceModelElement("seq3", [
                    FixedDataModelElement("s2", b"send packet: type "),
                    DecimalIntegerValueModelElement("packet-type")
                ]),
                SequenceModelElement("seq4", [
                    FixedDataModelElement("s2", b"receive packet: type "),
                    DecimalIntegerValueModelElement("packet-type")
                ]),
                FixedDataModelElement("do-cleanup", b"do_cleanup"),
                SequenceModelElement("seq5", [
                    FixedDataModelElement("s2",
                                          b"session_pty_cleanup: session "),
                    DecimalIntegerValueModelElement("sess-num"),
                    FixedDataModelElement("s3", b" release "),
                    AnyByteDataModelElement("file")
                ])
            ])
        ]),
        SequenceModelElement("pam_succeed_if", [
            FixedDataModelElement(
                "s0", b"pam_succeed_if(sshd:auth): requirement \"uid >= "),
            DecimalIntegerValueModelElement("uid"),
            FixedDataModelElement("s1", b"\" not met by user \""),
            DelimitedDataModelElement("user", b"\""),
            FixedDataModelElement("s2", b"\"")
        ]),
    ]

    model = SequenceModelElement("sshd", [
        FixedDataModelElement("sname", b"sshd["),
        DecimalIntegerValueModelElement("pid"),
        FixedDataModelElement("s0", b"]: "),
        FirstMatchModelElement("msg", type_children)
    ])
    return model
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement('Space', b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'),
        DelimitedDataModelElement('Data', b'%'), AnyByteDataModelElement('Rest')]

    service_children_login_details = [
        FixedDataModelElement('User/LoginDetails', b'User '), DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status', [b' logged in', b' logged out']),
        OptionalMatchModelElement('PastTime', SequenceModelElement('Time', [
            FixedDataModelElement('Blank', b' '), DecimalIntegerValueModelElement('Minutes'),
            FixedDataModelElement('Ago', b' minutes ago.')]))]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('UNameSpace2', b' '), DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')]

    service_children_random_time = [FixedDataModelElement('Space', b'Random: '), DecimalIntegerValueModelElement('Random')]

    service_children_sensors = [SequenceModelElement('CPUTemp', [
        FixedDataModelElement('FixedTemp', b'CPU Temp: '), DecimalIntegerValueModelElement('Temp'),
        FixedDataModelElement('Degrees', b'\xc2\xb0C')]), FixedDataModelElement('Space1', b', '), SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('FixedWorkload', b'CPU Workload: '), DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')]), FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)]

    service_children_user_ip_address = [
        FixedDataModelElement('User/UserIPAddress', b'User '), DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '), IpAddressDataModelElement('IP')]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement('CronType', [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('StartTime', b'\' in 5 min.')]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement('CronType', [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')]

    service_children_audit = [SequenceModelElement('path', [
        FixedDataModelElement('type', b'type=PATH '), FixedDataModelElement('msg_audit', b'msg=audit('),
        DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
        FixedDataModelElement('item_string', b'): item='), DecimalIntegerValueModelElement('item'),
        FixedDataModelElement('name_string', b' name="'), DelimitedDataModelElement('name', b'"'),
        FixedDataModelElement('inode_string', b'" inode='), DecimalIntegerValueModelElement('inode'),
        FixedDataModelElement('dev_string', b' dev='), DelimitedDataModelElement('dev', b' '),
        FixedDataModelElement('mode_string', b' mode='),
        DecimalIntegerValueModelElement('mode', value_pad_type=DecimalIntegerValueModelElement.PAD_TYPE_ZERO),
        FixedDataModelElement('ouid_string', b' ouid='), DecimalIntegerValueModelElement('ouid'),
        FixedDataModelElement('ogid_string', b' ogid='), DecimalIntegerValueModelElement('ogid'),
        FixedDataModelElement('rdev_string', b' rdev='), DelimitedDataModelElement('rdev', b' '),
        FixedDataModelElement('nametype_string', b' nametype='), FixedWordlistDataModelElement('nametype', [b'NORMAL', b'ERROR'])]),
        SequenceModelElement('syscall', [
            FixedDataModelElement('type', b'type=SYSCALL '), FixedDataModelElement('msg_audit', b'msg=audit('),
            DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
            FixedDataModelElement('arch_string', b'): arch='), DelimitedDataModelElement('arch', b' '),
            FixedDataModelElement('syscall_string', b' syscall='), DecimalIntegerValueModelElement('syscall'),
            FixedDataModelElement('success_string', b' success='), FixedWordlistDataModelElement('success', [b'yes', b'no']),
            FixedDataModelElement('exit_string', b' exit='), DecimalIntegerValueModelElement('exit'),
            AnyByteDataModelElement('remainding_data')])]

    service_children_parsing_model_element = [
        DateTimeModelElement('DateTimeModelElement', b'Current DateTime: %d.%m.%Y %H:%M:%S'),
        DecimalFloatValueModelElement('DecimalFloatValueModelElement', value_sign_type='optional'),
        DecimalIntegerValueModelElement('DecimalIntegerValueModelElement', value_sign_type='optional', value_pad_type='blank'),
        SequenceModelElement('se', [
            DelimitedDataModelElement('DelimitedDataModelElement', b';'), FixedDataModelElement('FixedDataModelElement', b';')])]

    # ElementValueBranchModelElement
    fixed_data_me1 = FixedDataModelElement("fixed1", b'match ')
    fixed_data_me2 = FixedDataModelElement("fixed2", b'fixed String')
    fixed_wordlist_data_model_element = FixedWordlistDataModelElement("wordlist", [b'data: ', b'string: '])
    decimal_integer_value_model_element = DecimalIntegerValueModelElement("decimal")

    service_children_parsing_model_element.append(
        ElementValueBranchModelElement('ElementValueBranchModelElement', FirstMatchModelElement("first", [
            SequenceModelElement("seq1", [fixed_data_me1, fixed_wordlist_data_model_element]),
            SequenceModelElement("seq2", [fixed_data_me1, fixed_wordlist_data_model_element, fixed_data_me2])]), "wordlist",
                                 {0: decimal_integer_value_model_element, 1: fixed_data_me2}))
    service_children_parsing_model_element.append(HexStringModelElement('HexStringModelElement'))
    service_children_parsing_model_element.append(SequenceModelElement('se2', [
        FixedDataModelElement('FixedDataModelElement', b'Gateway IP-Address: '), IpAddressDataModelElement('IpAddressDataModelElement')]))
    import locale
    loc = locale.getlocale()
    if loc == (None, None):
        loc = ('en_US', 'utf8')
    service_children_parsing_model_element.append(
        MultiLocaleDateTimeModelElement('MultiLocaleDateTimeModelElement', [(b'%b %d %Y', None, '%s.%s' % loc)]))
    service_children_parsing_model_element.append(
        RepeatedElementDataModelElement('RepeatedElementDataModelElement', SequenceModelElement('SequenceModelElement', [
            FixedDataModelElement('FixedDataModelElement', b'[drawn number]: '),
            DecimalIntegerValueModelElement('DecimalIntegerValueModelElement')]), 1))
    service_children_parsing_model_element.append(VariableByteDataModelElement('VariableByteDataModelElement', b'-@#'))
    service_children_parsing_model_element.append(SequenceModelElement('se', [
        WhiteSpaceLimitedDataModelElement('WhiteSpaceLimitedDataModelElement'), FixedDataModelElement('fixed', b' ')]))

    # The Base64StringModelElement must be just before the AnyByteDataModelElement to avoid unexpected Matches.
    service_children_parsing_model_element.append(Base64StringModelElement('Base64StringModelElement'))

    # The OptionalMatchModelElement must be paired with a FirstMatchModelElement because it accepts all data and thus no data gets
    # to the AnyByteDataModelElement. The AnyByteDataModelElement must be last, because all bytes are accepted.
    service_children_parsing_model_element.append(
        OptionalMatchModelElement('/', FirstMatchModelElement('FirstMatchModelElement//optional', [
            FixedDataModelElement('FixedDataModelElement', b'The-searched-element-was-found!'), SequenceModelElement('se', [
                FixedDataModelElement('FixedDME', b'Any:'), AnyByteDataModelElement('AnyByteDataModelElement')])])))

    alphabet = b'ghijkl'
    service_children_ecd = []
    for _, char in enumerate(alphabet):
        char = bytes([char])
        service_children_ecd.append(FixedDataModelElement(char.decode(), char))

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement', service_children_cron_job_announcement),
        SequenceModelElement('CronExecution', service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job), SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details), DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time), SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address), FirstMatchModelElement('type', service_children_audit),
        FirstMatchModelElement('ECD', service_children_ecd), FirstMatchModelElement('ParsingME', service_children_parsing_model_element)])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust([atom_filter])
    analysis_context.register_component(simple_monotonic_timestamp_adjust, component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    from aminer.events.JsonConverterHandler import JsonConverterHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    json_converter_handler = JsonConverterHandler([stream_printer_event_handler], analysis_context)
    anomaly_event_handlers = [json_converter_handler]

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(parsing_model, [simple_monotonic_timestamp_adjust],
                                                                            anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler, VerboseUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler, stop_when_handled_flag=False)
    analysis_context.register_component(simple_unparsed_atom_handler, component_name="SimpleUnparsedHandler")

    verbose_unparsed_atom_handler = VerboseUnparsedAtomHandler(anomaly_event_handlers, parsing_model)
    atom_filter.add_handler(verbose_unparsed_atom_handler, stop_when_handled_flag=True)
    analysis_context.register_component(verbose_unparsed_atom_handler, component_name="VerboseUnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(timestamps_unsorted_detector, component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule('/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(Rules.ValueMatchRule('/model/LoginDetails/Username', b'root')),
                Rules.DebugMatchRule(debug_match_result=True)]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(Rules.PathExistsMatchRule('/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails'),
                Rules.DebugMatchRule(debug_match_result=True)]),
            Rules.NegationMatchRule(Rules.PathExistsMatchRule('/model/LoginDetails'))])]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(analysis_context.aminer_config, allowlist_rules, anomaly_event_handlers,
                                                              output_log_line=True)
    analysis_context.register_component(allowlist_violation_detector, component_name="Allowlist")
    atom_filter.add_handler(allowlist_violation_detector)

    from aminer.analysis.ParserCount import ParserCount
    parser_count = ParserCount(analysis_context.aminer_config, None, anomaly_event_handlers, 10)
    analysis_context.register_component(parser_count, component_name="ParserCount")
    atom_filter.add_handler(parser_count)

    from aminer.analysis.EventTypeDetector import EventTypeDetector
    etd = EventTypeDetector(analysis_context.aminer_config, anomaly_event_handlers)
    analysis_context.register_component(etd, component_name="EventTypeDetector")
    atom_filter.add_handler(etd)

    from aminer.analysis.VariableTypeDetector import VariableTypeDetector
    vtd = VariableTypeDetector(analysis_context.aminer_config, anomaly_event_handlers, etd, silence_output_except_indicator=False,
                               output_log_line=False, ignore_list=["/model/RandomTime"])
    analysis_context.register_component(vtd, component_name="VariableTypeDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis.VariableCorrelationDetector import VariableCorrelationDetector
    vtd = VariableCorrelationDetector(analysis_context.aminer_config, anomaly_event_handlers, etd, disc_div_thres=0.5,
                                      ignore_list=["/model/RandomTime"])
    analysis_context.register_component(vtd, component_name="VariableCorrelationDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis.EventCorrelationDetector import EventCorrelationDetector
    ecd = EventCorrelationDetector(analysis_context.aminer_config, anomaly_event_handlers, check_rules_flag=True,
                                   hypothesis_max_delta_time=1.0)
    analysis_context.register_component(ecd, component_name="EventCorrelationDetector")
    atom_filter.add_handler(ecd)

    from aminer.analysis.EventFrequencyDetector import EventFrequencyDetector
    efd = EventFrequencyDetector(analysis_context.aminer_config, anomaly_event_handlers, window_size=0.1)
    analysis_context.register_component(efd, component_name="EventFrequencyDetector")
    atom_filter.add_handler(efd)

    from aminer.analysis.EventSequenceDetector import EventSequenceDetector
    esd = EventSequenceDetector(analysis_context.aminer_config, anomaly_event_handlers, ['/model/ParsingME'], ignore_list=[
        '/model/ECD/g', '/model/ECD/h', '/model/ECD/i', '/model/ECD/j', '/model/ECD/k', '/model/ECD/l', '/model/Random',
        '/model/RandomTime', '/model/DailyCron'])
    analysis_context.register_component(esd, component_name="EventSequenceDetector")
    atom_filter.add_handler(esd)

    from aminer.analysis.MatchFilter import MatchFilter
    match_filter = MatchFilter(analysis_context.aminer_config, ['/model/Random'], anomaly_event_handlers, target_value_list=[
        1, 10, 100], output_log_line=True)
    analysis_context.register_component(match_filter, component_name="MatchFilter")
    atom_filter.add_handler(match_filter)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(analysis_context.aminer_config, anomaly_event_handlers, auto_include_flag=True,
                                                   output_log_line=True)
    analysis_context.register_component(new_match_path_detector, component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(tuple(match_value_list))
        if extra_data is not None:
            mod = 10
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(analysis_context.aminer_config, [
        '/model/DailyCron/UName', '/model/DailyCron/JobNumber'], anomaly_event_handlers, auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function, output_log_line=True)
    analysis_context.register_component(enhanced_new_match_path_value_combo_detector, component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    import re
    ip_match_action = Rules.EventGenerationMatchAction(
        "Analysis.Rules.IPv4InRFC1918MatchRule", "Private IP address occurred!", anomaly_event_handlers)

    vdmt = Rules.ValueDependentModuloTimeMatchRule(None, 3, ["/model/ECD/j", "/model/ECD/k", "/model/ECD/l"], {b"e": [0, 2.95]}, [0, 3])
    mt = Rules.ModuloTimeMatchRule(None, 3, 0, 3, None)
    time_allowlist_rules = [
        Rules.AndMatchRule([
            Rules.ParallelMatchRule([
                Rules.ValueDependentDelegatedMatchRule([
                    '/model/ECD/g', '/model/ECD/h', '/model/ECD/i', '/model/ECD/j', '/model/ECD/k', '/model/ECD/l'], {
                        (b"a",): mt, (b"b",): mt, (b"c",): mt, (b"d",): vdmt, (b"e",): vdmt, (b"f",): vdmt, None: mt}, mt),
                Rules.IPv4InRFC1918MatchRule("/model/ParsingME/se2/IpAddressDataModelElement", ip_match_action),
                Rules.DebugHistoryMatchRule(debug_match_result=True)
            ]),
            # IP addresses 8.8.8.8, 8.8.4.4 and 10.0.0.0 - 10.255.255.255 are not allowed
            Rules.NegationMatchRule(Rules.ValueListMatchRule("/model/ParsingME/se2/IpAddressDataModelElement", [134744072, 134743044])),
            Rules.NegationMatchRule(Rules.ValueRangeMatchRule("/model/ParsingME/se2/IpAddressDataModelElement", 167772160, 184549375)),
            Rules.NegationMatchRule(Rules.StringRegexMatchRule("/model/type/syscall/success", re.compile(b"^no$")))
        ])
    ]
    time_allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, time_allowlist_rules, anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(time_allowlist_violation_detector, component_name="TimeAllowlist")
    atom_filter.add_handler(time_allowlist_violation_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(analysis_context.aminer_config, [
        ('/model/RandomTime/Random', modulo_time_bin_definition), ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(histogram_analysis, component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime', modulo_time_bin_definition, 10, anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(path_dependent_histogram_analysis, component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(analysis_context.aminer_config, anomaly_event_handlers, None, [
        '/model/Random'], 100, 10, output_log_line=True)
    analysis_context.register_component(match_value_average_change_detector, component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(
        sys.stdout, ['/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload', '/model/Sensors/DTM'], b';', b'')
    analysis_context.register_component(match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config, ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(new_match_path_value_combo_detector, component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchIdValueComboDetector import NewMatchIdValueComboDetector
    new_match_id_value_combo_detector = NewMatchIdValueComboDetector(analysis_context.aminer_config, [
        '/model/type/path/name', '/model/type/syscall/syscall'], anomaly_event_handlers, id_path_list=[
        '/model/type/path/id', '/model/type/syscall/id'], min_allowed_time_diff=5, auto_include_flag=True, allow_missing_values_flag=True,
        output_log_line=True)
    analysis_context.register_component(new_match_id_value_combo_detector, component_name="NewMatchIdValueComboDetector")
    atom_filter.add_handler(new_match_id_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(analysis_context.aminer_config, [
        '/model/DailyCron/JobNumber', '/model/IPAddresses/Username'], anomaly_event_handlers, auto_include_flag=True, output_log_line=True)
    analysis_context.register_component(new_match_path_value_detector, component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'], anomaly_event_handlers, auto_include_flag=True, default_interval=2,
        realert_interval=5, output_log_line=True)
    analysis_context.register_component(missing_match_path_value_detector, component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config, anomaly_event_handlers, 2, min_rule_attributes=1, max_rule_attributes=5,
        record_count_before_event=10000, output_log_line=True)
    analysis_context.register_component(time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule('CronJobAnnouncement', 5, 6, max_artefacts_a_for_single_b=1, artefact_match_parameters=[
        ('/model/CronAnnouncement/JobNumber', '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement', [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None, [cron_job_announcement])
    rules = [Rules.PathExistsMatchRule('/model/CronAnnouncement/Run', a_class_selector),
             Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(analysis_context.aminer_config, rules, anomaly_event_handlers,
                                                                           output_log_line=True)
    analysis_context.register_component(time_correlation_violation_detector, component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement(
            'Space',
            b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'
        ),
        DelimitedDataModelElement('Data', b'%'),
        AnyByteDataModelElement('Rest')
    ]

    service_children_login_details = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status',
                                      [b' logged in', b' logged out']),
        OptionalMatchModelElement(
            'PastTime',
            SequenceModelElement('Time', [
                FixedDataModelElement('Blank', b' '),
                DecimalIntegerValueModelElement('Minutes'),
                FixedDataModelElement('Ago', b' minutes ago.')
            ]))
    ]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('UNameSpace2', b' '),
        DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')
    ]

    service_children_random_time = [
        FixedDataModelElement('Space', b'Random: '),
        DecimalIntegerValueModelElement('Random')
    ]

    service_children_sensors = [
        SequenceModelElement('CPUTemp', [
            FixedDataModelElement('FixedTemp', b'CPU Temp: '),
            DecimalIntegerValueModelElement('Temp'),
            FixedDataModelElement('Degrees', b'\xc2\xb0C')
        ]),
        FixedDataModelElement('Space1', b', '),
        SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('Fixed Workload', b'CPU Workload: '),
            DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')
        ]),
        FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)
    ]

    service_children_user_ip_address = [
        FixedDataModelElement('User', b'User '),
        DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '),
        IpAddressDataModelElement('IP')
    ]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Start Time', b'\' in 5 min.')
    ]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string),
        FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '),
        FixedDataModelElement('Cron', cron),
        DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement(
            'CronType',
            [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')
    ]

    service_children_parsing_model_element = [
        DateTimeModelElement('DateTimeModelElement',
                             b'Current DateTime: %d.%m.%Y %H:%M:%S'),
        DecimalFloatValueModelElement('DecimalFloatValueModelElement',
                                      value_sign_type='optional'),
        DecimalIntegerValueModelElement('DecimalIntegerValueModelElement',
                                        value_sign_type='optional',
                                        value_pad_type='blank'),
        SequenceModelElement('', [
            DelimitedDataModelElement('DelimitedDataModelElement', b';'),
            FixedDataModelElement('FixedDataModelElement', b';')
        ])
    ]

    # ElementValueBranchModelElement
    fixed_data_me1 = FixedDataModelElement("fixed1", b'match ')
    fixed_data_me2 = FixedDataModelElement("fixed2", b'fixed String')
    fixed_wordlist_data_model_element = FixedWordlistDataModelElement(
        "wordlist", [b'data: ', b'string: '])
    decimal_integer_value_model_element = DecimalIntegerValueModelElement(
        "decimal")

    service_children_parsing_model_element.append(
        ElementValueBranchModelElement(
            'ElementValueBranchModelElement',
            FirstMatchModelElement("first", [
                SequenceModelElement(
                    "seq1",
                    [fixed_data_me1, fixed_wordlist_data_model_element]),
                SequenceModelElement("seq2", [
                    fixed_data_me1, fixed_wordlist_data_model_element,
                    fixed_data_me2
                ])
            ]), "wordlist", {
                0: decimal_integer_value_model_element,
                1: fixed_data_me2
            }))
    service_children_parsing_model_element.append(
        HexStringModelElement('HexStringModelElement'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            FixedDataModelElement('FixedDataModelElement',
                                  b'Gateway IP-Address: '),
            IpAddressDataModelElement('IpAddressDataModelElement')
        ]))
    service_children_parsing_model_element.append(
        MultiLocaleDateTimeModelElement('MultiLocaleDateTimeModelElement',
                                        [(b'%b %d %Y', "de_AT.utf8", None)]))
    service_children_parsing_model_element.append(
        RepeatedElementDataModelElement(
            'RepeatedElementDataModelElement',
            SequenceModelElement('SequenceModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'drawn number: '),
                DecimalIntegerValueModelElement(
                    'DecimalIntegerValueModelElement')
            ]), 1))
    service_children_parsing_model_element.append(
        VariableByteDataModelElement('VariableByteDataModelElement', b'-@#'))
    service_children_parsing_model_element.append(
        SequenceModelElement('', [
            WhiteSpaceLimitedDataModelElement(
                'WhiteSpaceLimitedDataModelElement'),
            FixedDataModelElement('', b' ')
        ]))

    # The Base64StringModelElement must be just before the AnyByteDataModelElement to avoid unexpected Matches.
    service_children_parsing_model_element.append(
        Base64StringModelElement('Base64StringModelElement'))

    # The OptionalMatchModelElement must be paired with a FirstMatchModelElement because it accepts all data and thus no data gets
    # to the AnyByteDataModelElement. The AnyByteDataModelElement must be last, because all bytes are accepted.
    service_children_parsing_model_element.append(
        OptionalMatchModelElement(
            'OptionalMatchModelElement',
            FirstMatchModelElement('FirstMatchModelElement', [
                FixedDataModelElement('FixedDataModelElement',
                                      b'The-searched-element-was-found!'),
                AnyByteDataModelElement('AnyByteDataModelElement')
            ])))

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement',
                             service_children_cron_job_announcement),
        SequenceModelElement('CronExecution',
                             service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job),
        SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details),
        DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time),
        SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address),
        FirstMatchModelElement('ParsingME',
                               service_children_parsing_model_element)
    ])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers
    # later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
        [atom_filter])
    analysis_context.register_component(
        simple_monotonic_timestamp_adjust,
        component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    from aminer.events.SyslogWriterEventHandler import SyslogWriterEventHandler
    syslog_event_handler = SyslogWriterEventHandler(analysis_context)
    from aminer.events.DefaultMailNotificationEventHandler import DefaultMailNotificationEventHandler
    if DefaultMailNotificationEventHandler.CONFIG_KEY_MAIL_TARGET_ADDRESS in analysis_context.aminer_config.config_properties:
        mail_notification_handler = DefaultMailNotificationEventHandler(
            analysis_context)
        analysis_context.register_component(mail_notification_handler,
                                            component_name="MailHandler")
    anomaly_event_handlers = [
        stream_printer_event_handler, syslog_event_handler,
        mail_notification_handler
    ]

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(
        parsing_model, [simple_monotonic_timestamp_adjust],
        anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(
        anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler,
                            stop_when_handled_flag=True)
    analysis_context.register_component(simple_unparsed_atom_handler,
                                        component_name="UnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(
        analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(
        timestamps_unsorted_detector,
        component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule(
                    '/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(
                    Rules.ValueMatchRule('/model/LoginDetails/Username',
                                         b'root'))
            ]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(
                    Rules.PathExistsMatchRule(
                        '/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails')
            ]),
            Rules.NegationMatchRule(
                Rules.PathExistsMatchRule('/model/LoginDetails'))
        ])
    ]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, allowlist_rules,
        anomaly_event_handlers)
    analysis_context.register_component(allowlist_violation_detector,
                                        component_name="Allowlist")
    atom_filter.add_handler(allowlist_violation_detector)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_detector,
                                        component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10000th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(
            tuple(match_value_list))
        if extra_data is not None:
            mod = 10000
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/UName', '/model/DailyCron/JobNumber'],
        anomaly_event_handlers,
        auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function)
    analysis_context.register_component(
        enhanced_new_match_path_value_combo_detector,
        component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(
        86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(
        analysis_context.aminer_config,
        [('/model/RandomTime/Random', modulo_time_bin_definition),
         ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers)
    analysis_context.register_component(histogram_analysis,
                                        component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime',
        modulo_time_bin_definition, 10, anomaly_event_handlers)
    analysis_context.register_component(
        path_dependent_histogram_analysis,
        component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(
        analysis_context.aminer_config, anomaly_event_handlers, None,
        ['/model/Random'], 100, 10)
    analysis_context.register_component(
        match_value_average_change_detector,
        component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(sys.stdout, [
        '/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload',
        '/model/Sensors/DTM'
    ], b';', b'')
    analysis_context.register_component(
        match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config,
        ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(
        new_match_path_value_combo_detector,
        component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(
        analysis_context.aminer_config,
        ['/model/DailyCron/JobNumber', '/model/IPAddresses/Username'],
        anomaly_event_handlers,
        auto_include_flag=True)
    analysis_context.register_component(new_match_path_value_detector,
                                        component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'],
        anomaly_event_handlers,
        auto_include_flag=True,
        default_interval=2,
        realert_interval=5)
    analysis_context.register_component(missing_match_path_value_detector,
                                        component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config,
        anomaly_event_handlers,
        2,
        min_rule_attributes=1,
        max_rule_attributes=5,
        record_count_before_event=70000,
        output_log_line=True)
    analysis_context.register_component(
        time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule(
        'CronJobAnnouncement',
        5,
        6,
        max_artefacts_a_for_single_b=1,
        artefact_match_parameters=[('/model/CronAnnouncement/JobNumber',
                                    '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement',
                                          [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None,
                                          [cron_job_announcement])
    rules = [
        Rules.PathExistsMatchRule('/model/CronAnnouncement/Run',
                                  a_class_selector),
        Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)
    ]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(
        analysis_context.aminer_config, rules, anomaly_event_handlers)
    analysis_context.register_component(
        time_correlation_violation_detector,
        component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)