예제 #1
0
def main():
    import argparse

    parser = argparse.ArgumentParser(
        description="Extract a single EVTX record and pretty print it.")
    parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file")
    parser.add_argument("record",
                        type=int,
                        help="The record number of the record to extract")
    args = parser.parse_args()

    with Evtx(args.evtx) as evtx:
        record = evtx.get_record(args.record)
        if record is None:
            raise RuntimeError("Cannot find the record specified.")

        try:
            print prettify_xml(
                "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?>\n%s"
                % evtx_record_xml_view(record))
        except ExpatError as e:
            print "Exception: "
            print repr(e)
            print ""
            print ""
            print "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?>\n%s" % evtx_record_xml_view(
                record)
예제 #2
0
def xml_records(filename):
    with Evtx(filename) as evtx:
        for xml, record in evtx_file_xml_view(evtx.get_file_header()):
            try:
                yield to_lxml(xml), None
            except etree.XMLSyntaxError as e:
                yield xml, e, fh
예제 #3
0
def ParseEvtx(files):
    writefile = open("..\\RESULTS\\EventLog.txt", "a+")

    with Evtx(files) as evtx:
        total = sum(1 for i in evtx.records())

    with open(files, 'r') as f:
        with contextlib.closing(
                mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf:
            fh = FileHeader(buf, 0x0)
            writefile.write(
                "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?>"
            )
            writefile.write("<Events>")
            count = 0
            for xml, record in evtx_file_xml_view(fh):
                count += 1
                writefile.write(ascii(xml))
                bar_len = 55
                filled_len = int(round(bar_len * count / float(total)))
                percents = round(100.0 * count / float(total), 1)
                bar = '=' * filled_len + '-' * (bar_len - filled_len)
                sys.stdout.write('[%s] %s%s %s/%s \r' %
                                 (bar, percents, '%', count, total))
                sys.stdout.flush()
                writefile.write("</Events>")
    print
    print
예제 #4
0
def xml_records(filename):
    if OPTIONS.alternate:
        with Evtx(filename) as evtx:
            try:
                for xml, record in evtx_file_xml_view(evtx.get_file_header()):
                    try:
                        yield ef.to_lxml(xml), None
                    except etree.XMLSyntaxError as e:
                        yield xml, e
            except BinaryParser.OverrunBufferException as e:
                logging.error("Overrun Buffer Exception!")
                yield None, e
            except BinaryParser.ParseException as e:
                logging.error("Parse Exception!")
                yield None, e
            except Exception as e:  # UnicodeDecodeError, AttributeError
                logging.error(e)
                yield None, e
    else:
        parser = PyEvtxParser(filename)
        try:
            for record in parser.records():
                try:
                    yield ef.to_lxml(record['data']), None
                except etree.XMLSyntaxError as e:
                    yield record['data'], e
        except Exception as e:  # UnicodeDecodeError, AttributeError, RuntimeError
            logging.error(e)
            yield None, e
예제 #5
0
def xml_records(filename):
    if args.evtx:
        with Evtx(filename) as evtx:
            for xml, record in evtx_file_xml_view(evtx.get_file_header()):
                try:
                    yield to_lxml(xml), None
                except etree.XMLSyntaxError as e:
                    yield xml, e

    if args.xmls:
        with open(filename, 'r') as fx:
            xdata = fx.read()
            fixdata = xdata.replace(
                "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>",
                "").replace("</Events>", "").replace("<Events>", "")
            # fixdata = xdata.replace("<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>", "")
            del xdata
            xml_list = re.split(
                "<Event xmlns=[\'\"]http://schemas.microsoft.com/win/2004/08/events/event[\'\"]>",
                fixdata)
            del fixdata
            for xml in xml_list:
                if xml.startswith("<System>"):
                    try:
                        yield to_lxml("<Event>" + xml), None
                    except etree.XMLSyntaxError as e:
                        yield xml, e
예제 #6
0
def main():
    import argparse
    logging.basicConfig(level=logging.DEBUG)
    logging.getLogger("iso8601.iso8601").setLevel(logging.WARNING)

    parser = argparse.ArgumentParser(
        description="Print the record numbers of EVTX log entries "
        "that match the given EID.")
    parser.add_argument("input_file",
                        type=str,
                        help="Path to the Windows EVTX file or .pt file")

    subparsers = parser.add_subparsers(dest="cmd")

    ts_parser = subparsers.add_parser("ts")
    ts_parser.add_argument("ts",
                           type=str,
                           default="",
                           help="iso8601 timestamp with which to filter")

    summary_parser = subparsers.add_parser("summary")

    serialize_parser = subparsers.add_parser("serialize")
    serialize_parser.add_argument("pt",
                                  type=str,
                                  default="state.pt",
                                  help=".pt file to serialize parsed trees")

    args = parser.parse_args()

    analyzer = ProcessTreeAnalyzer()
    if args.input_file.lower().endswith(".pt"):
        g_logger.info("using serialized file")
        with open(args.input_file, "rb") as f:
            analyzer.deserialize(f)
    else:
        g_logger.info("using evtx log file")
        with Evtx(args.input_file) as evtx:
            analyzer.analyze(
                get_entries_with_eids(evtx, set([4688, 4689, 1, 5])))
            pass

    if args.cmd == "summary":
        summarize_processes(analyzer.get_processes())
    elif args.cmd == "ts":
        if args.ts == "all":
            for root in analyzer.get_roots():
                draw_tree(analyzer, root)
        else:
            g_logger.error("query trees not yet supported")
    elif args.cmd == "serialize":
        if not args.pt.lower().endswith(".pt"):
            g_logger.error("serialize output file must have .pt extension")
        else:
            with open(args.pt, "wb") as f:
                analyzer.serialize(f)
    else:
        g_logger.error("unknown command: %s", args.cmd)
예제 #7
0
def xml_records(filename):
    with Evtx(filename) as evtx:
        for xml, record in evtx_file_xml_view(evtx.get_file_header()):
            try:
                logger.debug("Yielding XML")
                yield to_lxml(xml), None
            except etree.XMLSyntaxError as e:
                logger.error(e)
                yield xml, e
예제 #8
0
def xml_records(filename):
    try:
        with Evtx(filename) as evtx:
            for xml, record in evtx_file_xml_view(evtx.get_file_header()):
                try:
                    yield etree.fromstring(xml), None
                except etree.XMLSyntaxError as e:
                    yield xml, e
    except IOError as e:
        sys.stderr.write("Error: Cannot open file {}\n".format(filename))
        sys.exit(2)
예제 #9
0
def main():
    import argparse

    parser = argparse.ArgumentParser(
        description="Print the structure of an EVTX record's template.")
    parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file")
    parser.add_argument("record", type=int, help="Record number")
    args = parser.parse_args()

    with Evtx(args.evtx) as evtx:
        r = evtx.get_record(args.record)
        print evtx_template_readable_view(r.root())
예제 #10
0
def main():

    for root, subdirs, files in os.walk("C:\\Windows\\System32\\winevt\\Logs"):
        for file_names in files:
            if file_names == "Windows PowerShell.evtx":
                with Evtx(
                        os.path.abspath(
                            "C:\\Windows\\System32\\winevt\\Logs\\" +
                            file_names)) as evtx:
                    script_data = Magic(evtx)
                    z = OutPut(script_data)
                    get_all_file_zip(z)
def xml_records(filename):
    """
    If the second return value is not None, then it is an
      Exception encountered during parsing.  The first return value
      will be the XML string.

    @type filename str
    @rtype: generator of (etree.Element or str), (None or Exception)
    """
    with Evtx(filename) as evtx:
        for xml, record in evtx_file_xml_view(evtx.get_file_header()):
            try:
                yield to_lxml(xml), None
            except etree.XMLSyntaxError as e:
                yield xml, e
예제 #12
0
def extract_xml(evtx_file):
    """
        Parse the evtx file and extract just the xml parts of each event.
        If the function throws an error, we'll catch it and just return the value we have with the error.
    """
    with Evtx(evtx_file) as evtx:
        for xml, record in evtx_file_xml_view(evtx.get_file_header()):
            try:
                # Successfully parsed! Return the lxml object

                yield to_lxml(xml), None
            except etree.XMLSyntaxError as e:
                # Parse failed, return what we have and an exception object

                yield xml, e
예제 #13
0
def main():
    parser = argparse.ArgumentParser(description='Parse Task Scheduler EVTX logs.')
    parser.add_argument('-p', '--path', help='Path to Task Scheduler EVTX file(s).')
    args = parser.parse_args()
    if args.path:
        input_path = args.path
    else:
        print "You need to specify a path to your EVTX file(s)."

    for root, subdirs, files in os.walk(input_path):
        for file_names in files:
            if re.search(".*taskscheduler.*\.evtx$", file_names.lower()):
                with Evtx(os.path.abspath(input_path + file_names)) as evtx:
                    task_results = get_Tasks(evtx)
                    outputResults(task_results)
예제 #14
0
def main():
    import argparse

    parser = argparse.ArgumentParser(
        description="Write the raw data for a EVTX record to STDOUT")
    parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file")
    parser.add_argument("record",
                        type=int,
                        help="The record number of the record to extract")
    args = parser.parse_args()

    with Evtx(args.evtx) as evtx:
        record = evtx.get_record(args.record)
        if record is None:
            raise RuntimeError("Cannot find the record specified.")
        sys.stdout.write(record.data())
예제 #15
0
def main():
    parser = argparse.ArgumentParser(
        description='Parse PS1 scripts out of Windows Powershell EVTX Logs.')
    parser.add_argument('-p', '--path', help='Path to EVTX.')
    args = parser.parse_args()
    if args.path:
        input_path = args.path
    else:
        print "You need to specify a path to your Windows Powershell EVTX file."

    for root, subdirs, files in os.walk(input_path):
        for file_names in files:
            if re.search(".*powershell\.evtx$", file_names.lower()):
                with Evtx(os.path.abspath(input_path + file_names)) as evtx:
                    script_data = get_Scripts(evtx)
                    processData(script_data)
예제 #16
0
def total_records(args):
    try:
        if not args.quiet:
            sys.stderr.write("Calculating total number of records... ")
        total = 0
        with Evtx(args.evtx) as evtx:
            fh = evtx.get_file_header()
            for chunk in fh.chunks():
                first_record = chunk.log_first_record_number()
                last_record = chunk.log_last_record_number()
                total += 1 + last_record - first_record
            if not args.quiet:
                sys.stderr.write(str(total) + "\n")
            return total
    except IOError as e:
        sys.stderr.write("Error: Cannot open file {}\n".format(filename))
        sys.exit(2)
예제 #17
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        description=
        "Parse PowerShell script block log entries (EID 4104) out of the Microsoft-Windows-PowerShell%4Operational.evtx event log. By default, reconstructs all multi-message blocks."
    )
    parser.add_argument(
        "evtx",
        type=str,
        help=
        "Path to the Microsoft-Windows-PowerShell%%4Operational.evtx event log file to parse"
    )
    parser.add_argument(
        "-m",
        "--metadata",
        type=str,
        help="Output script block metadata to CSV. Specify output file.")
    parser.add_argument("-s",
                        "--scriptid",
                        type=str,
                        help="Script block ID to parse. Use with -f or -o")
    parser.add_argument(
        "-f",
        "--file",
        type=str,
        help="Write blocks to a single file. Specify output file.")
    parser.add_argument("-o",
                        "--output",
                        type=str,
                        help="Output directory for script blocks.")
    parser.add_argument("-a",
                        "--all",
                        action='store_true',
                        help="Output all blocks.")
    args = parser.parse_args()

    with Evtx(args.evtx) as evtx:
        process_entries(get_entries_with_eids(evtx,
                                              set([4104])), args.scriptid,
                        args.all, args.output, args.file, args.metadata)
        pass
예제 #18
0
    def getRecord(self, filename, record_number):
        #         parser = argparse.ArgumentParser(
        #             description="Write the raw data for a EVTX record to STDOUT")
        #         parser.add_argument("evtx", type=str,
        #                             help="Path to the Windows EVTX file")
        #         parser.add_argument("record", type=int,
        #                             help="The record number of the record to extract")
        #         args = parser.parse_args()
        if(os.name == 'posix'):
            log_dir = log_dir_linux
        else:
            log_dir = log_dir_windows
        with Evtx(os.path.join(log_dir, filename)) as evtx:
            record = evtx.get_record(record_number)

            if record is None:
                raise RuntimeError("Cannot find the record specified.")
#             sys.stdout.write(record.data())
            record_str = evtx_record_xml_view(record)
#             print record_str
            return record_str
예제 #19
0
def main():
    import argparse

    parser = argparse.ArgumentParser(
        description="Print the record numbers of EVTX log entries "
                    "that match the given EID.")
    parser.add_argument("evtx", type=str,
                        help="Path to the Windows EVTX file")
    parser.add_argument("eid", type=int,
                        help="The EID of records to extract")
    args = parser.parse_args()

    with Evtx(args.evtx) as evtx:
        for xml, record in evtx_file_xml_view(evtx.get_file_header()):
            try:
                node = to_lxml(xml)
            except XMLSyntaxError:
                continue
            if args.eid != int(get_child(get_child(node, "System"), "EventID").text):
                continue
            print record.record_num()
예제 #20
0
def main():
    import argparse

    parser = argparse.ArgumentParser(
        description="Pretty print the binary structure of an EVTX record.")
    parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file")
    parser.add_argument("record", type=int, help="Record number")
    parser.add_argument("--suppress_values",
                        action="store_true",
                        help="Do not print the values of substitutions.")
    args = parser.parse_args()

    with Evtx(args.evtx) as evtx:
        print hex_dump(evtx.get_record(args.record).data())

        print("record(absolute_offset=%s)" % \
                  (evtx.get_record(args.record).offset()))
        print describe_root(evtx.get_record(args.record),
                            evtx.get_record(args.record).root(),
                            suppress_values=args.suppress_values)
        print evtx_record_xml_view(evtx.get_record(args.record))
예제 #21
0
def parse_log(path, codes):
    arr = dict()
    with Evtx(path) as log:
        for record in log.records():
            node = record.lxml()
            first_ch = get_child(node, "System")
            event_id = int(get_child(first_ch, "EventID").text)
            if event_id in codes:
                time = get_child(first_ch, "TimeCreated").attrib["SystemTime"]
                EventRecordID = get_child(first_ch, "EventRecordID").text
                event_data = get_child(node, "EventData").getchildren()
                # data = {"EventRecordID": EventRecordID, "TimeStamp": time}
                data = {"TimeStamp": time}
                if event_id == 4688:  # process creation
                    data["Type"] = "Creation"
                    for item in event_data:
                        if item.get("Name") == "NewProcessId":
                            data["NewProcessId"] = int(item.text, 16)
                        elif item.get("Name") == "NewProcessName":
                            data["NewProcessName"] = item.text
                        # elif item.get("Name") == "CommandLine":
                        #     data["CommandLine"] = item.text
                        elif item.get("Name") == "ProcessId":
                            data["ProcessId"] = int(item.text, 16)
                        elif item.get("Name") == "ParentProcessName":
                            data["ProcessName"] = item.text
                        elif item.get("Name") == "TargetUserName":
                            data["TargetUserName"] = item.text
                elif event_id == 4689:  # process termination
                    data["Type"] = "Termination"
                    for item in event_data:
                        if item.get("Name") == "ProcessId":
                            data["ProcessId"] = int(item.text, 16)
                        elif item.get("Name") == "ProcessName":
                            data["ProcessName"] = item.text
                arr[EventRecordID] = data

    return arr
예제 #22
0
def parse_evtx(evtx_list):
    event_set = pd.DataFrame(index=[], columns=["eventid", "ipaddress", "username", "logintype", "status", "authname", "date"])
    count_set = pd.DataFrame(index=[], columns=["dates", "eventid", "username"])
    ml_frame = pd.DataFrame(index=[], columns=["date", "user", "host", "id"])
    username_set = []
    domain_set = []
    admins = []
    domains = []
    ntmlauth = []
    deletelog = []
    policylist = []
    addusers = {}
    delusers = {}
    addgroups = {}
    removegroups = {}
    sids = {}
    hosts = {}
    dcsync_count = {}
    dcsync = {}
    dcshadow_check = []
    dcshadow = {}
    count = 0
    record_sum = 0
    starttime = None
    endtime = None

    if args.timezone:
        try:
            datetime.timezone(datetime.timedelta(hours=args.timezone))
            tzone = args.timezone
            print("[*] Time zone is %s." % args.timezone)
        except:
            sys.exit("[!] Can't load time zone '%s'." % args.timezone)
    else:
        tzone = 0

    if args.fromdate:
        try:
            fdatetime = datetime.datetime.strptime(args.fromdate, "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." % fdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.")

    if args.todate:
        try:
            tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." % tdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.")

    for evtx_file in evtx_list:
        if args.evtx:
            with open(evtx_file, "rb") as fb:
                fb_data = fb.read(8)
                if fb_data != EVTX_HEADER:
                    sys.exit("[!] This file is not EVTX format {0}.".format(evtx_file))

            chunk = -2
            with Evtx(evtx_file) as evtx:
                fh = evtx.get_file_header()
                try:
                    while True:
                        last_chunk = list(evtx.chunks())[chunk]
                        last_record = last_chunk.file_last_record_number()
                        chunk -= 1
                        if last_record > 0:
                            record_sum = record_sum + last_record
                            break
                except:
                    record_sum = record_sum + fh.next_record_number()

        if args.xmls:
            with open(evtx_file, "r") as fb:
                fb_header = fb.read(6)
                if "<?xml" not in fb_header:
                    sys.exit("[!] This file is not XML format {0}.".format(evtx_file))
                for line in fb:
                    record_sum += line.count("<System>")

    print("[*] Last record number is %i." % record_sum)

    # Parse Event log
    print("[*] Start parsing the EVTX file.")

    for evtx_file in evtx_list:
        print("[*] Parse the EVTX file %s." % evtx_file)

        for node, err in xml_records(evtx_file):
            if err is not None:
                continue
            count += 1
            eventid = int(node.xpath("/Event/System/EventID")[0].text)

            if not count % 100:
                sys.stdout.write("\r[*] Now loading %i records." % count)
                sys.stdout.flush()

            if eventid in EVENT_ID:
                logtime = node.xpath("/Event/System/TimeCreated")[0].get("SystemTime")
                try:
                    etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone)
                except:
                    etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone)
                stime = datetime.datetime(*etime.timetuple()[:4])
                if args.fromdate or args.todate:
                    if args.fromdate and fdatetime > etime:
                        continue
                    if args.todate and tdatetime < etime:
                        endtime = stime
                        break

                if starttime is None:
                    starttime = stime
                elif starttime > etime:
                    starttime = stime

                if endtime is None:
                    endtime = stime
                elif endtime < etime:
                    endtime = stime

                event_data = node.xpath("/Event/EventData/Data")
                logintype = "-"
                username = "******"
                domain = "-"
                ipaddress = "-"
                hostname = "-"
                status = "-"
                sid = "-"
                authname = "-"

                ###
                # Detect admin users
                #  EventID 4672: Special privileges assigned to new logon
                ###
                if eventid == 4672:
                    for data in event_data:
                        if data.get("Name") in "SubjectUserName" and data.text is not None and not re.search(UCHECK, data.text):
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                    if username not in admins and username != "-":
                        admins.append(username)
                ###
                # Detect removed user account and added user account.
                #  EventID 4720: A user account was created
                #  EventID 4726: A user account was deleted
                ###
                elif eventid in [4720, 4726]:
                    for data in event_data:
                        if data.get("Name") in "TargetUserName" and data.text is not None and not re.search(UCHECK, data.text):
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                    if eventid == 4720:
                        addusers[username] = etime.strftime("%Y-%m-%d %H:%M:%S")
                    else:
                        delusers[username] = etime.strftime("%Y-%m-%d %H:%M:%S")
                ###
                # Detect Audit Policy Change
                #  EventID 4719: System audit policy was changed
                ###
                elif eventid == 4719:
                    for data in event_data:
                        if data.get("Name") in "SubjectUserName" and data.text is not None and not re.search(UCHECK, data.text):
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        if data.get("Name") in "CategoryId" and data.text is not None and re.search(r"\A%%\d{4}\Z", data.text):
                            category = data.text
                        if data.get("Name") in "SubcategoryGuid" and data.text is not None and re.search(r"\A{[\w\-]*}\Z", data.text):
                            guid = data.text
                    policylist.append([etime.strftime("%Y-%m-%d %H:%M:%S"), username, category, guid.lower(), int(stime.strftime("%s"))])
                ###
                # Detect added users from specific group
                #  EventID 4728: A member was added to a security-enabled global group
                #  EventID 4732: A member was added to a security-enabled local group
                #  EventID 4756: A member was added to a security-enabled universal group
                ###
                elif eventid in [4728, 4732, 4756]:
                    for data in event_data:
                        if data.get("Name") in "TargetUserName" and data.text is not None and not re.search(UCHECK, data.text):
                            groupname = data.text
                        elif data.get("Name") in "MemberSid" and data.text not in "-" and data.text is not None and re.search(r"\AS-[0-9\-]*\Z", data.text):
                            usid = data.text
                    addgroups[usid] = "AddGroup: " + groupname + "(" + etime.strftime("%Y-%m-%d %H:%M:%S") + ") "
                ###
                # Detect removed users from specific group
                #  EventID 4729: A member was removed from a security-enabled global group
                #  EventID 4733: A member was removed from a security-enabled local group
                #  EventID 4757: A member was removed from a security-enabled universal group
                ###
                elif eventid in [4729, 4733, 4757]:
                    for data in event_data:
                        if data.get("Name") in "TargetUserName" and data.text is not None and not re.search(UCHECK, data.text):
                            groupname = data.text
                        elif data.get("Name") in "MemberSid" and data.text not in "-" and data.text is not None and re.search(r"\AS-[0-9\-]*\Z", data.text):
                            usid = data.text
                    removegroups[usid] = "RemoveGroup: " + groupname + "(" + etime.strftime("%Y-%m-%d %H:%M:%S") + ") "
                ###
                # Detect DCSync
                #  EventID 4662: An operation was performed on an object
                ###
                elif eventid == 4662:
                    for data in event_data:
                        if data.get("Name") in "SubjectUserName" and data.text is not None and not re.search(UCHECK, data.text):
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        dcsync_count[username] = dcsync_count.get(username, 0) + 1
                        if dcsync_count[username] == 3:
                            dcsync[username] = etime.strftime("%Y-%m-%d %H:%M:%S")
                            dcsync_count[username] = 0
                ###
                # Detect DCShadow
                #  EventID 5137: A directory service object was created
                #  EventID 5141: A directory service object was deleted
                ###
                elif eventid in [5137, 5141]:
                    for data in event_data:
                        if data.get("Name") in "SubjectUserName" and data.text is not None and not re.search(UCHECK, data.text):
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        if etime.strftime("%Y-%m-%d %H:%M:%S") in dcshadow_check:
                            dcshadow[username] = etime.strftime("%Y-%m-%d %H:%M:%S")
                        else:
                            dcshadow_check.append(etime.strftime("%Y-%m-%d %H:%M:%S"))
                ###
                # Parse logon logs
                #  EventID 4624: An account was successfully logged on
                #  EventID 4625: An account failed to log on
                #  EventID 4768: A Kerberos authentication ticket (TGT) was requested
                #  EventID 4769: A Kerberos service ticket was requested
                #  EventID 4776: The domain controller attempted to validate the credentials for an account
                ###
                else:
                    for data in event_data:
                        # parse IP Address
                        if data.get("Name") in ["IpAddress", "Workstation"] and data.text is not None and (not re.search(HCHECK, data.text) or re.search(IPv4_PATTERN, data.text) or re.search(r"\A::ffff:\d+\.\d+\.\d+\.\d+\Z", data.text) or re.search(IPv6_PATTERN, data.text)):
                            ipaddress = data.text.split("@")[0]
                            ipaddress = ipaddress.lower().replace("::ffff:", "")
                            ipaddress = ipaddress.replace("\\", "")
                        # Parse hostname
                        if data.get("Name") == "WorkstationName" and data.text is not None and (not re.search(HCHECK, data.text) or re.search(IPv4_PATTERN, data.text) or re.search(r"\A::ffff:\d+\.\d+\.\d+\.\d+\Z", data.text) or re.search(IPv6_PATTERN, data.text)):
                            hostname = data.text.split("@")[0]
                            hostname = hostname.lower().replace("::ffff:", "")
                            hostname = hostname.replace("\\", "")
                        # Parse username
                        if data.get("Name") in "TargetUserName" and data.text is not None and not re.search(UCHECK, data.text):
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        # Parse targeted domain name
                        if data.get("Name") in "TargetDomainName" and data.text is not None and not re.search(HCHECK, data.text):
                            domain = data.text
                        # parse trageted user SID
                        if data.get("Name") in ["TargetUserSid", "TargetSid"] and data.text is not None and re.search(r"\AS-[0-9\-]*\Z", data.text):
                            sid = data.text
                        # parse lonon type
                        if data.get("Name") in "LogonType" and re.search(r"\A\d{1,2}\Z", data.text):
                            logintype = int(data.text)
                        # parse status
                        if data.get("Name") in "Status" and re.search(r"\A0x\w{8}\Z", data.text):
                            status = data.text
                        # parse Authentication package name
                        if data.get("Name") in "AuthenticationPackageName" and re.search(r"\A\w*\Z", data.text):
                            authname = data.text

                    if username != "-" and username != "anonymous logon" and ipaddress != "::1" and ipaddress != "127.0.0.1" and (ipaddress != "-" or hostname != "-"):
                        # generate pandas series
                        if ipaddress != "-":
                            event_series = pd.Series([eventid, ipaddress, username, logintype, status, authname, int(stime.strftime("%s"))], index=event_set.columns)
                            ml_series = pd.Series([etime.strftime("%Y-%m-%d %H:%M:%S"), username, ipaddress, eventid],  index=ml_frame.columns)
                        else:
                            event_series = pd.Series([eventid, hostname, username, logintype, status, authname, int(stime.strftime("%s"))], index=event_set.columns)
                            ml_series = pd.Series([etime.strftime("%Y-%m-%d %H:%M:%S"), username, hostname, eventid],  index=ml_frame.columns)
                        # append pandas series to dataframe
                        event_set = event_set.append(event_series, ignore_index=True)
                        ml_frame = ml_frame.append(ml_series, ignore_index=True)
                        # print("%s,%i,%s,%s,%s,%s" % (eventid, ipaddress, username, comment, logintype))
                        count_series = pd.Series([stime.strftime("%Y-%m-%d %H:%M:%S"), eventid, username], index=count_set.columns)
                        count_set = count_set.append(count_series, ignore_index=True)
                        # print("%s,%s" % (stime.strftime("%Y-%m-%d %H:%M:%S"), username))

                        if domain != "-":
                            domain_set.append([username, domain])

                        if username not in username_set:
                            username_set.append(username)

                        if domain not in domains and domain != "-":
                            domains.append(domain)

                        if sid != "-":
                            sids[username] = sid

                        if hostname != "-" and ipaddress != "-":
                            hosts[ipaddress] = hostname

                        if authname in "NTML" and authname not in ntmlauth:
                            ntmlauth.append(username)
            ###
            # Detect the audit log deletion
            # EventID 1102: The audit log was cleared
            ###
            if eventid == 1102:
                logtime = node.xpath("/Event/System/TimeCreated")[0].get("SystemTime")
                try:
                    etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone)
                except:
                    etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone)
                deletelog.append(etime.strftime("%Y-%m-%d %H:%M:%S"))

                namespace = "http://manifests.microsoft.com/win/2004/08/windows/eventlog"
                user_data = node.xpath("/Event/UserData/ns:LogFileCleared/ns:SubjectUserName", namespaces={"ns": namespace})
                domain_data = node.xpath("/Event/UserData/ns:LogFileCleared/ns:SubjectDomainName", namespaces={"ns": namespace})

                if user_data[0].text is not None:
                    username = user_data[0].text.split("@")[0]
                    if username[-1:] not in "$":
                        deletelog.append(username.lower())
                    else:
                        deletelog.append("-")
                else:
                    deletelog.append("-")

                if domain_data[0].text is not None:
                    deletelog.append(domain_data[0].text)
                else:
                    deletelog.append("-")

    print("\n[*] Load finished.")
    print("[*] Total Event log is %i." % count)

    if not username_set:
        sys.exit("[!] This event log did not include logs to be visualized. Please check the details of the event log.")

    tohours = int((endtime - starttime).total_seconds() / 3600)

    if hosts:
        event_set = event_set.replace(hosts)
    event_set_bydate = event_set
    event_set_bydate["count"] = event_set_bydate.groupby(["eventid", "ipaddress", "username", "logintype", "status", "authname", "date"])["eventid"].transform("count")
    event_set_bydate = event_set_bydate.drop_duplicates()
    event_set = event_set.drop("date", axis=1)
    event_set["count"] = event_set.groupby(["eventid", "ipaddress", "username", "logintype", "status", "authname"])["eventid"].transform("count")
    event_set = event_set.drop_duplicates()
    count_set["count"] = count_set.groupby(["dates", "eventid", "username"])["dates"].transform("count")
    count_set = count_set.drop_duplicates()
    domain_set_uniq = list(map(list, set(map(tuple, domain_set))))

    # Learning event logs using Hidden Markov Model
    if hosts:
        ml_frame = ml_frame.replace(hosts)
    ml_frame = ml_frame.sort_values(by="date")
    if args.learn:
        print("[*] Learning event logs using Hidden Markov Model.")
        learnhmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3]))

    # Calculate ChangeFinder
    print("[*] Calculate ChangeFinder.")
    timelines, detects, detect_cf = adetection(count_set, username_set, starttime, tohours)

    # Calculate Hidden Markov Model
    print("[*] Calculate Hidden Markov Model.")
    detect_hmm = decodehmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3]))

    # Calculate PageRank
    print("[*] Calculate PageRank.")
    ranks = pagerank(event_set, admins, detect_hmm, detect_cf, ntmlauth)

    # Create node
    print("[*] Creating a graph data.")

    try:
        graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/"
        GRAPH = Graph(graph_http)
    except:
        sys.exit("[!] Can't connect Neo4j Database.")

    tx = GRAPH.begin()
    hosts_inv = {v: k for k, v in hosts.items()}
    for ipaddress in event_set["ipaddress"].drop_duplicates():
        if ipaddress in hosts_inv:
            hostname = hosts_inv[ipaddress]
        else:
            hostname = ipaddress
        # add the IPAddress node to neo4j
        tx.append(statement_ip, {"IP": ipaddress, "rank": ranks[ipaddress], "hostname": hostname})

    i = 0
    for username in username_set:
        if username in sids:
            sid = sids[username]
        else:
            sid = "-"
        if username in admins:
            rights = "system"
        else:
            rights = "user"
        ustatus = ""
        if username in addusers:
            ustatus += "Created(" + addusers[username] + ") "
        if username in delusers:
            ustatus += "Deleted(" + delusers[username] + ") "
        if sid in addgroups:
            ustatus += addgroups[sid]
        if sid in removegroups:
            ustatus += removegroups[sid]
        if username in dcsync:
            ustatus += "DCSync(" + dcsync[username] + ") "
        if username in dcshadow:
            ustatus += "DCShadow(" + dcshadow[username] + ") "
        if not ustatus:
            ustatus = "-"

        # add the username node to neo4j
        tx.append(statement_user, {"user": username[:-1], "rank": ranks[username], "rights": rights, "sid": sid, "status": ustatus,
                                   "counts": ",".join(map(str, timelines[i*6])), "counts4624": ",".join(map(str, timelines[i*6+1])),
                                   "counts4625": ",".join(map(str, timelines[i*6+2])), "counts4768": ",".join(map(str, timelines[i*6+3])),
                                   "counts4769": ",".join(map(str, timelines[i*6+4])), "counts4776": ",".join(map(str, timelines[i*6+5])),
                                   "detect": ",".join(map(str, detects[i]))})
        i += 1

    for domain in domains:
        # add the domain node to neo4j
        tx.append(statement_domain, {"domain": domain})

    for _, events in event_set_bydate.iterrows():
        # add the (username)-(event)-(ip) link to neo4j
        tx.append(statement_r, {"user": events["username"][:-1], "IP": events["ipaddress"], "id": events["eventid"], "logintype": events["logintype"],
                                "status": events["status"], "count": events["count"], "authname": events["authname"], "date": events["date"]})

    for username, domain in domain_set_uniq:
        # add (username)-()-(domain) link to neo4j
        tx.append(statement_dr, {"user": username[:-1], "domain": domain})

    # add the date node to neo4j
    tx.append(statement_date, {"Daterange": "Daterange", "start": datetime.datetime(*starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"),
                               "end": datetime.datetime(*endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S")})

    if len(deletelog):
        # add the delete flag node to neo4j
        tx.append(statement_del, {"deletetime": deletelog[0], "user": deletelog[1], "domain": deletelog[2]})

    if len(policylist):
        id = 0
        for policy in policylist:
            if policy[2] in CATEGORY_IDs:
                category = CATEGORY_IDs[policy[2]]
            else:
                category = policy[2]
            if policy[3] in AUDITING_CONSTANTS:
                sub = AUDITING_CONSTANTS[policy[3]]
            else:
                sub = policy[3]
            username = policy[1]
            # add the policy id node to neo4j
            tx.append(statement_pl, {"id": id, "changetime": policy[0], "category": category, "sub": sub})
            # add (username)-(policy)-(id) link to neo4j
            tx.append(statement_pr, {"user": username[:-1], "id": id, "date": policy[4]})
            id += 1

    tx.process()
    tx.commit()
    print("[*] Creation of a graph data finished.")
예제 #23
0
import lxml.etree as et
from Evtx.Evtx import Evtx
from eventcodes import EVENT_CODES

with Evtx('WindowsEvents.evtx') as ev:
    for i, rec in enumerate(ev.records(), 1):  # loop over records

        xml_element = rec.lxml()  # get lxml Element obj
        ns = xml_element.nsmap    # get XML namespace
        # Dump XML of entire record:
        # print(et.tostring(xml_element, pretty_print=True).decode())
        # print("-" * 60)
        # if i == 10:
        #     break
        # continue
        event_id = xml_element.findtext('.//EventID', namespaces=ns)
        if event_id == "10016":
            continue

        print(f"Record {i}:")

        execution_element = xml_element.find('.//Execution', namespaces=ns)
        if execution_element is not None:
            process_id = execution_element.get('ProcessID')
        else:
            process_id = 'N/A'
        print("\tProcess ID:", process_id)

        computer = xml_element.findtext('.//Computer', namespaces=ns)
        print("\tComputer:", computer)
        record_id = xml_element.findtext('.//EventRecordID', namespaces=ns)
예제 #24
0
def parse_evtx(evtx_list):
    event_set = pd.DataFrame(index=[],
                             columns=[
                                 "eventid", "ipaddress", "username",
                                 "logintype", "status", "authname"
                             ])
    count_set = pd.DataFrame(index=[],
                             columns=["dates", "eventid", "username"])
    ml_frame = pd.DataFrame(index=[], columns=["date", "user", "host", "id"])
    username_set = []
    domain_set = []
    admins = []
    domains = []
    ntmlauth = []
    deletelog = []
    policylist = []
    addusers = {}
    delusers = {}
    addgroups = {}
    removegroups = {}
    sids = {}
    hosts = {}
    dcsync_count = {}
    dcsync = {}
    dcshadow_check = []
    dcshadow = {}
    count = 0
    record_sum = 0
    starttime = None
    endtime = None

    if args.timezone:
        try:
            datetime.timezone(datetime.timedelta(hours=args.timezone))
            tzone = args.timezone
            print("[*] Time zone is %s." % args.timezone)
        except:
            sys.exit("[!] Can't load time zone '%s'." % args.timezone)
    else:
        tzone = 0

    if args.fromdate:
        try:
            fdatetime = datetime.datetime.strptime(args.fromdate,
                                                   "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." %
                  fdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.")

    if args.todate:
        try:
            tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." %
                  tdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.")

    for evtx_file in evtx_list:
        if args.evtx:
            with open(evtx_file, "rb") as fb:
                fb_data = fb.read()[0:8]
                if fb_data != EVTX_HEADER:
                    sys.exit("[!] This file is not EVTX format {0}.".format(
                        evtx_file))

            chunk = -2
            with Evtx(evtx_file) as evtx:
                fh = evtx.get_file_header()
                try:
                    while True:
                        last_chunk = list(evtx.chunks())[chunk]
                        last_record = last_chunk.file_last_record_number()
                        chunk -= 1
                        if last_record > 0:
                            record_sum = record_sum + last_record
                            break
                except:
                    record_sum = record_sum + fh.next_record_number()

        if args.xmls:
            with open(evtx_file, "r") as fb:
                fb_data = fb.read()
                if "<?xml" not in fb_data[0:6]:
                    sys.exit("[!] This file is not XML format {0}.".format(
                        evtx_file))
                record_sum += fb_data.count("<System>")
                del fb_data

    print("[*] Last record number is %i." % record_sum)

    # Parse Event log
    print("[*] Start parsing the EVTX file.")

    for evtx_file in evtx_list:
        print("[*] Parse the EVTX file %s." % evtx_file)

        for node, err in xml_records(evtx_file):
            if err is not None:
                continue
            count += 1
            eventid = int(node.xpath("/Event/System/EventID")[0].text)

            if not count % 100:
                sys.stdout.write("\r[*] Now loading %i records." % count)
                sys.stdout.flush()

            if eventid in EVENT_ID:
                logtime = node.xpath("/Event/System/TimeCreated")[0].get(
                    "SystemTime")
                try:
                    etime = datetime.datetime.strptime(
                        logtime.split(".")[0],
                        "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone)
                except:
                    etime = datetime.datetime.strptime(
                        logtime.split(".")[0],
                        "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone)
                stime = datetime.datetime(*etime.timetuple()[:4])
                if args.fromdate or args.todate:
                    if args.fromdate and fdatetime > etime:
                        continue
                    if args.todate and tdatetime < etime:
                        endtime = stime
                        break

                if starttime is None:
                    starttime = stime
                elif starttime > etime:
                    starttime = stime

                if endtime is None:
                    endtime = stime
                elif endtime < etime:
                    endtime = stime

                event_data = node.xpath("/Event/EventData/Data")
                logintype = "-"
                username = "******"
                domain = "-"
                ipaddress = "-"
                hostname = "-"
                status = "-"
                sid = "-"
                authname = "-"

                if eventid == 4672:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "SubjectUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                    if username not in admins and username != "-":
                        admins.append(username)
                elif eventid in [4720, 4726]:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "TargetUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                    if eventid == 4720:
                        addusers[username] = etime.strftime(
                            "%Y-%m-%d %H:%M:%S")
                    else:
                        delusers[username] = etime.strftime(
                            "%Y-%m-%d %H:%M:%S")
                elif eventid == 4719:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "SubjectUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        if data.get(
                                "Name") in "CategoryId" and data.text != None:
                            category = data.text
                        if data.get(
                                "Name"
                        ) in "SubcategoryGuid" and data.text != None:
                            guid = data.text
                    policylist.append([
                        etime.strftime("%Y-%m-%d %H:%M:%S"), username,
                        category,
                        guid.lower()
                    ])
                elif eventid in [4728, 4732, 4756]:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "TargetUserName" and data.text != None:
                            groupname = data.text
                        elif data.get(
                                "Name"
                        ) in "MemberSid" and data.text not in "-" and data.text != None:
                            usid = data.text
                    addgroups[
                        usid] = "AddGroup: " + groupname + "(" + etime.strftime(
                            "%Y-%m-%d %H:%M:%S") + ") "
                elif eventid in [4729, 4733, 4757]:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "TargetUserName" and data.text != None:
                            groupname = data.text
                        elif data.get(
                                "Name"
                        ) in "MemberSid" and data.text not in "-" and data.text != None:
                            usid = data.text
                    removegroups[
                        usid] = "RemoveGroup: " + groupname + "(" + etime.strftime(
                            "%Y-%m-%d %H:%M:%S") + ") "
                elif eventid == 4662:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "SubjectUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        dcsync_count[username] = dcsync_count.get(username,
                                                                  0) + 1
                        if dcsync_count[username] == 3:
                            dcsync[username] = etime.strftime(
                                "%Y-%m-%d %H:%M:%S")
                            dcsync_count[username] = 0
                elif eventid in [5137, 5141]:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "SubjectUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        if etime.strftime(
                                "%Y-%m-%d %H:%M:%S") in dcshadow_check:
                            dcshadow[username] = etime.strftime(
                                "%Y-%m-%d %H:%M:%S")
                        else:
                            dcshadow_check.append(
                                etime.strftime("%Y-%m-%d %H:%M:%S"))
                else:
                    for data in event_data:
                        if data.get("Name") in ["IpAddress", "Workstation"
                                                ] and data.text != None:
                            ipaddress = data.text.split("@")[0]
                            ipaddress = ipaddress.lower().replace(
                                "::ffff:", "")
                            ipaddress = ipaddress.replace("\\", "")

                        if data.get(
                                "Name"
                        ) == "WorkstationName" and data.text != None:
                            hostname = data.text.split("@")[0]
                            hostname = hostname.lower().replace("::ffff:", "")
                            hostname = hostname.replace("\\", "")

                        if data.get(
                                "Name"
                        ) in "TargetUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"

                        if data.get(
                                "Name"
                        ) in "TargetDomainName" and data.text != None:
                            domain = data.text

                        if data.get("Name") in [
                                "TargetUserSid", "TargetSid"
                        ] and data.text != None and data.text[0:2] in "S-1":
                            sid = data.text

                        if data.get("Name") in "LogonType":
                            logintype = int(data.text)

                        if data.get("Name") in "Status":
                            status = data.text

                        if data.get("Name") in "AuthenticationPackageName":
                            authname = data.text

                    if username != "-" and ipaddress != "::1" and ipaddress != "127.0.0.1" and (
                            ipaddress != "-" or hostname != "-"):
                        if ipaddress != "-":
                            event_series = pd.Series([
                                eventid, ipaddress, username, logintype,
                                status, authname
                            ],
                                                     index=event_set.columns)
                            ml_series = pd.Series([
                                etime.strftime("%Y-%m-%d %H:%M:%S"), username,
                                ipaddress, eventid
                            ],
                                                  index=ml_frame.columns)
                        else:
                            event_series = pd.Series([
                                eventid, hostname, username, logintype, status,
                                authname
                            ],
                                                     index=event_set.columns)
                            ml_series = pd.Series([
                                etime.strftime("%Y-%m-%d %H:%M:%S"), username,
                                hostname, eventid
                            ],
                                                  index=ml_frame.columns)
                        event_set = event_set.append(event_series,
                                                     ignore_index=True)
                        ml_frame = ml_frame.append(ml_series,
                                                   ignore_index=True)
                        # print("%s,%i,%s,%s,%s,%s" % (eventid, ipaddress, username, comment, logintype))
                        count_series = pd.Series([
                            stime.strftime("%Y-%m-%d %H:%M:%S"), eventid,
                            username
                        ],
                                                 index=count_set.columns)
                        count_set = count_set.append(count_series,
                                                     ignore_index=True)
                        # print("%s,%s" % (stime.strftime("%Y-%m-%d %H:%M:%S"), username))

                        if domain != "-":
                            domain_set.append([username, domain])

                        if username not in username_set:
                            username_set.append(username)

                        if domain not in domains and domain != "-":
                            domains.append(domain)

                        if sid != "-":
                            sids[username] = sid

                        if hostname != "-" and ipaddress != "-":
                            hosts[hostname] = ipaddress

                        if authname in "NTML" and authname not in ntmlauth:
                            ntmlauth.append(username)

            if eventid == 1102:
                logtime = node.xpath("/Event/System/TimeCreated")[0].get(
                    "SystemTime")
                try:
                    etime = datetime.datetime.strptime(
                        logtime.split(".")[0],
                        "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone)
                except:
                    etime = datetime.datetime.strptime(
                        logtime.split(".")[0],
                        "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone)
                deletelog.append(etime.strftime("%Y-%m-%d %H:%M:%S"))

                namespace = "http://manifests.microsoft.com/win/2004/08/windows/eventlog"
                user_data = node.xpath(
                    "/Event/UserData/ns:LogFileCleared/ns:SubjectUserName",
                    namespaces={"ns": namespace})
                domain_data = node.xpath(
                    "/Event/UserData/ns:LogFileCleared/ns:SubjectDomainName",
                    namespaces={"ns": namespace})

                if user_data[0].text != None:
                    username = user_data[0].text.split("@")[0]
                    if username[-1:] not in "$":
                        deletelog.append(username.lower())
                    else:
                        deletelog.append("-")
                else:
                    deletelog.append("-")

                if domain_data[0].text != None:
                    deletelog.append(domain_data[0].text)
                else:
                    deletelog.append("-")

    print("\n[*] Load finished.")
    print("[*] Total Event log is %i." % count)

    if not username_set:
        sys.exit(
            "[!] This event log did not include logs to be visualized. Please check the details of the event log."
        )

    tohours = int((endtime - starttime).total_seconds() / 3600)

    if hosts:
        event_set = event_set.replace(hosts)
    event_set["count"] = event_set.groupby([
        "eventid", "ipaddress", "username", "logintype", "status", "authname"
    ])["eventid"].transform("count")
    event_set = event_set.drop_duplicates()
    count_set["count"] = count_set.groupby(["dates", "eventid", "username"
                                            ])["dates"].transform("count")
    count_set = count_set.drop_duplicates()
    domain_set_uniq = list(map(list, set(map(tuple, domain_set))))

    # Learning event logs using Hidden Markov Model
    if hosts:
        ml_frame = ml_frame.replace(hosts)
    ml_frame = ml_frame.sort_values(by="date")
    if args.learn:
        print("[*] Learning event logs using Hidden Markov Model.")
        learnhmm(ml_frame, username_set,
                 datetime.datetime(*starttime.timetuple()[:3]))

    # Calculate ChangeFinder
    print("[*] Calculate ChangeFinder.")
    timelines, detects, detect_cf = adetection(count_set, username_set,
                                               starttime, tohours)

    # Calculate Hidden Markov Model
    print("[*] Calculate Hidden Markov Model.")
    detect_hmm = decodehmm(ml_frame, username_set,
                           datetime.datetime(*starttime.timetuple()[:3]))

    # Calculate PageRank
    print("[*] Calculate PageRank.")
    ranks = pagerank(event_set, admins, detect_hmm, detect_cf, ntmlauth)

    # Create node
    print("[*] Creating a graph data.")

    try:
        graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/"
        GRAPH = Graph(graph_http)
    except:
        sys.exit("[!] Can't connect Neo4j Database.")

    tx = GRAPH.begin()
    hosts_inv = {v: k for k, v in hosts.items()}
    for ipaddress in event_set["ipaddress"].drop_duplicates():
        if ipaddress in hosts_inv:
            hostname = hosts_inv[ipaddress]
        else:
            hostname = ipaddress
        tx.append(statement_ip, {
            "IP": ipaddress,
            "rank": ranks[ipaddress],
            "hostname": hostname
        })

    i = 0
    for username in username_set:
        if username in sids:
            sid = sids[username]
        else:
            sid = "-"
        if username in admins:
            rights = "system"
        else:
            rights = "user"
        ustatus = ""
        if username in addusers:
            ustatus += "Created(" + addusers[username] + ") "
        if username in delusers:
            ustatus += "Deleted(" + delusers[username] + ") "
        if sid in addgroups:
            ustatus += addgroups[sid]
        if sid in removegroups:
            ustatus += removegroups[sid]
        if username in dcsync:
            ustatus += "DCSync(" + dcsync[username] + ") "
        if username in dcshadow:
            ustatus += "DCShadow(" + dcshadow[username] + ") "
        if not ustatus:
            ustatus = "-"
        tx.append(
            statement_user, {
                "user": username[:-1],
                "rank": ranks[username],
                "rights": rights,
                "sid": sid,
                "status": ustatus,
                "counts": ",".join(map(str, timelines[i * 6])),
                "counts4624": ",".join(map(str, timelines[i * 6 + 1])),
                "counts4625": ",".join(map(str, timelines[i * 6 + 2])),
                "counts4768": ",".join(map(str, timelines[i * 6 + 3])),
                "counts4769": ",".join(map(str, timelines[i * 6 + 4])),
                "counts4776": ",".join(map(str, timelines[i * 6 + 5])),
                "detect": ",".join(map(str, detects[i]))
            })
        i += 1

    for domain in domains:
        tx.append(statement_domain, {"domain": domain})

    for _, events in event_set.iterrows():
        tx.append(
            statement_r, {
                "user": events["username"][:-1],
                "IP": events["ipaddress"],
                "id": events["eventid"],
                "logintype": events["logintype"],
                "status": events["status"],
                "count": events["count"],
                "authname": events["authname"]
            })

    for username, domain in domain_set_uniq:
        tx.append(statement_dr, {"user": username[:-1], "domain": domain})

    tx.append(
        statement_date, {
            "Daterange":
            "Daterange",
            "start":
            datetime.datetime(
                *starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"),
            "end":
            datetime.datetime(
                *endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S")
        })

    if len(deletelog):
        tx.append(
            statement_del, {
                "deletetime": deletelog[0],
                "user": deletelog[1],
                "domain": deletelog[2]
            })

    if len(policylist):
        id = 0
        for policy in policylist:
            if policy[2] in CATEGORY_IDs:
                category = CATEGORY_IDs[policy[2]]
            else:
                category = policy[2]
            if policy[3] in AUDITING_CONSTANTS:
                sub = AUDITING_CONSTANTS[policy[3]]
            else:
                sub = policy[3]
            username = policy[1]
            tx.append(
                statement_pl, {
                    "id": id,
                    "changetime": policy[0],
                    "category": category,
                    "sub": sub
                })
            tx.append(statement_pr, {"user": username[:-1], "id": id})
            id += 1

    tx.process()
    tx.commit()
    print("[*] Creation of a graph data finished.")
예제 #25
0
def parse_evtx(evtx_list, GRAPH):
    event_set = pd.DataFrame(index=[], columns=["eventid", "ipaddress", "username", "logintype", "status", "authname"])
    count_set = pd.DataFrame(index=[], columns=["dates", "eventid", "username"])
    username_set = []
    domain_set = []
    admins = []
    domains = []
    sids = {}
    hosts = {}
    count = 0
    record_sum = 0
    starttime = None
    endtime = None

    if args.timezone:
        try:
            datetime.timezone(datetime.timedelta(hours=args.timezone))
            tzone = args.timezone
            print("[*] Time zone is %s." % args.timezone)
        except:
            sys.exit("[!] Can't load time zone '%s'." % args.timezone)
    else:
        tzone = 0

    if args.fromdate:
        try:
            fdatetime = datetime.datetime.strptime(args.fromdate, "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." % fdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.")

    if args.todate:
        try:
            tdatetime =  datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." % tdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.")

    for evtx_file in evtx_list:
        if args.evtx:
            with open(evtx_file, "rb") as fb:
                fb_data = fb.read()[0:8]
                if fb_data != EVTX_HEADER:
                    sys.exit("[!] This file is not EVTX format {0}.".format(evtx_file))

            chunk = -2
            with Evtx(evtx_file) as evtx:
                fh = evtx.get_file_header()
                try:
                    while True:
                        last_chunk = list(evtx.chunks())[chunk]
                        last_record = last_chunk.file_last_record_number()
                        chunk -= 1
                        if last_record > 0:
                            record_sum = record_sum + last_record
                            break
                except:
                    record_sum =  record_sum + fh.next_record_number()

        if args.xmls:
            with open(evtx_file, "r") as fb:
                fb_data = fb.read()
                if "<?xml" not in fb_data[0:6]:
                    sys.exit("[!] This file is not XML format {0}.".format(evtx_file))
                record_sum += fb_data.count("<System>")
                del fb_data

    print("[*] Last record number is %i." % record_sum)

    # Parse Event log
    print("[*] Start parsing the EVTX file.")

    for evtx_file in evtx_list:
        print("[*] Parse the EVTX file %s." % evtx_file)

        for node, err in xml_records(evtx_file):
            if err is not None:
                continue
            count += 1
            eventid = int(node.xpath("/Event/System/EventID")[0].text)

            if not count % 100:
                sys.stdout.write("\r[*] Now loading %i records." % count)
                sys.stdout.flush()

            if eventid in EVENT_ID:
                logtime = node.xpath("/Event/System/TimeCreated")[0].get("SystemTime")
                try:
                    etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone)
                except:
                    etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone)
                stime = datetime.datetime(*etime.timetuple()[:4])
                if args.fromdate or args.todate:
                    if args.fromdate and fdatetime > etime:
                        continue
                    if args.todate and tdatetime < etime:
                        endtime = stime
                        break

                if starttime is None:
                    starttime = stime
                elif starttime > etime:
                    starttime = stime

                if endtime is None:
                    endtime = stime
                elif endtime < etime:
                    endtime = stime

                event_data = node.xpath("/Event/EventData/Data")
                logintype = "-"
                username = "******"
                domain = "-"
                ipaddress = "-"
                hostname = "-"
                status = "-"
                sid = "-"
                authname = "-"

                if eventid == 4672:
                    for data in event_data:
                        if data.get("Name") in "SubjectUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower()
                            else:
                                username = "******"

                    if username not in admins and username != "-":
                        admins.append(username)
                else:
                    for data in event_data:
                        if data.get("Name") in ["IpAddress", "Workstation"] and data.text != None:
                            ipaddress = data.text.split("@")[0]
                            ipaddress = ipaddress.lower().replace("::ffff:", "")
                            ipaddress = ipaddress.replace("\\", "")

                        if data.get("Name") == "WorkstationName" and data.text != None:
                            hostname = data.text.split("@")[0]
                            hostname = hostname.lower().replace("::ffff:", "")
                            hostname = hostname.replace("\\", "")

                        if data.get("Name") in "TargetUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower()
                            else:
                                username = "******"

                        if data.get("Name") in "TargetDomainName" and data.text != None:
                            domain = data.text

                        if data.get("Name") in ["TargetUserSid", "TargetSid"] and data.text != None and data.text[0:2] in "S-1":
                            sid = data.text

                        if data.get("Name") in "LogonType":
                            logintype = int(data.text)

                        if data.get("Name") in "Status":
                            status = data.text

                        if data.get("Name") in "AuthenticationPackageName":
                            authname = data.text

                    if username != "-" and ipaddress != "-" and ipaddress != "::1" and ipaddress != "127.0.0.1":
                        event_series = pd.Series([eventid, ipaddress, username, logintype, status, authname], index=event_set.columns)
                        event_set = event_set.append(event_series, ignore_index = True)
                        # print("%s,%i,%s,%s,%s,%s" % (eventid, ipaddress, username, comment, logintype))
                        count_series = pd.Series([stime.strftime("%Y-%m-%d %H:%M:%S"), eventid, username], index=count_set.columns)
                        count_set = count_set.append(count_series, ignore_index = True)
                        # print("%s,%s" % (stime.strftime("%Y-%m-%d %H:%M:%S"), username))
                        if domain != "-":
                            domain_set.append([username, domain])

                        if username not in username_set:
                            username_set.append(username)

                        if domain not in domains and domain != "-":
                            domains.append(domain)

                        if sid not in "-":
                            sids[username] = sid

                        if hostname not in "-":
                            hosts[hostname] = ipaddress

    tohours = int((endtime - starttime).total_seconds() / 3600)

    print("\n[*] Load finished.")
    print("[*] Total Event log is %i." % count)
    event_set = event_set.replace(hosts)
    event_set["count"] = event_set.groupby(["eventid", "ipaddress", "username", "logintype", "status", "authname"])["eventid"].transform("count")
    event_set = event_set.drop_duplicates()
    count_set["count"] = count_set.groupby(["dates", "eventid", "username"])["dates"].transform("count")
    count_set = count_set.drop_duplicates()
    domain_set_uniq = list(map(list, set(map(tuple, domain_set))))

    # Calculate PageRank
    print("[*] Calculate PageRank.")
    ranks = pagerank(event_set)

    # Calculate ChangeFinder
    print("[*] Calculate ChangeFinder.")
    timelines, detects = adetection(count_set, username_set, ranks, starttime, tohours)

    # Create node
    print("[*] Creating a graph data.")
    tx = GRAPH.begin()
    hosts_inv = {v:k for k, v in hosts.items()}
    for ipaddress in event_set["ipaddress"].drop_duplicates():
        if ipaddress in hosts_inv:
            hostname = hosts_inv[ipaddress]
        else:
            hostname = ipaddress
        tx.append(statement_ip, {"IP": ipaddress, "rank": ranks[ipaddress], "hostname": hostname})

    i = 0
    for username in username_set:
        if username in sids:
            sid = sids[username]
        else:
            sid = "-"
        if username in admins:
            rights = "system"
        else:
            rights = "user"
        tx.append(statement_user, {"user": username, "rank": ranks[username],"rights": rights,"sid": sid,
                                                    "counts": ",".join(map(str, timelines[i*6])), "counts4624": ",".join(map(str, timelines[i*6+1])),
                                                    "counts4625": ",".join(map(str, timelines[i*6+2])), "counts4768": ",".join(map(str, timelines[i*6+3])),
                                                    "counts4769": ",".join(map(str, timelines[i*6+4])), "counts4776": ",".join(map(str, timelines[i*6+5])),
                                                    "detect": ",".join(map(str, detects[i]))})
        i += 1

    for domain in domains:
        tx.append(statement_domain, {"domain": domain})

    for _, events in event_set.iterrows():
        tx.append(statement_r, {"user": events["username"], "IP": events["ipaddress"], "id": events["eventid"], "logintype": events["logintype"],
                                               "status": events["status"], "count": events["count"], "authname": events["authname"]})

    for username, domain in domain_set_uniq:
        tx.append(statement_dr, {"user": username, "domain": domain})

    tx.append(statement_date, {"Daterange": "Daterange", "start": datetime.datetime(*starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"),
                                                 "end": datetime.datetime(*endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S")})

    tx.process()
    tx.commit()
    print("[*] Creation of a graph data finished.")
예제 #26
0
def parse_evtx(evtx_list, GRAPH):
    event_set = []
    count_set = []
    ipaddress_set = []
    username_set = []
    admins = []
    sids = {}
    count = 0
    record_sum = 0
    starttime = None
    endtime = None

    if args.timezone:
        try:
            datetime.timezone(datetime.timedelta(hours=args.timezone))
            tzone = args.timezone
            print("[*] Time zone is %s." % args.timezone)
        except:
            sys.exit("[!] Can't load time zone '%s'." % args.timezone)
    else:
        tzone = 0

    if args.fromdate:
        try:
            fdatetime = datetime.datetime.strptime(args.fromdate,
                                                   "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." %
                  fdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.")

    if args.todate:
        try:
            tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." %
                  tdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.")

    for evtx_file in evtx_list:
        fb = open(evtx_file, "rb")
        fb_data = fb.read()[0:8]
        if fb_data != EVTX_HEADER:
            sys.exit("[!] This file is not EVTX format {0}.".format(evtx_file))
        fb.close()

        chunk = -2
        with Evtx(evtx_file) as evtx:
            fh = evtx.get_file_header()
            while True:
                last_chunk = list(evtx.chunks())[chunk]
                last_record = last_chunk.file_last_record_number()
                chunk -= 1
                if last_record > 0:
                    record_sum = record_sum + last_record
                    break

    print("[*] Last recode number is %i." % record_sum)

    # Parse Event log
    print("[*] Start parsing the EVTX file.")

    for evtx_file in evtx_list:
        print("[*] Parse the EVTX file %s." % evtx_file)

        for node, err in xml_records(evtx_file):
            count += 1
            if not count % 100:
                sys.stdout.write("\r[*] Now loading %i records." % count)
                sys.stdout.flush()

            if err is not None:
                continue

            sysev = get_child(node, "System")
            if int(get_child(sysev, "EventID").text) in EVENT_ID:
                logtime = get_child(sysev, "TimeCreated").get("SystemTime")
                etime = datetime.datetime.strptime(
                    logtime.split(".")[0],
                    "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone)
                if args.fromdate or args.todate:
                    if args.fromdate and fdatetime > etime:
                        continue
                    if args.todate and tdatetime < etime:
                        endtime = datetime.datetime(*etime.timetuple()[:4])
                        break

                if starttime is None:
                    starttime = datetime.datetime(*etime.timetuple()[:4])
                elif starttime > etime:
                    starttime = datetime.datetime(*etime.timetuple()[:4])

                if endtime is None:
                    endtime = datetime.datetime(*etime.timetuple()[:4])
                elif endtime < etime:
                    endtime = datetime.datetime(*etime.timetuple()[:4])

                event_data = get_child(node, "EventData")
                logintype = "-"
                username = "******"
                ipaddress = "-"
                status = "-"
                sid = "-"
                for data in event_data:
                    if data.get("Name") in ["IpAddress", "Workstation"
                                            ] and data.text != None:
                        ipaddress = data.text.split("@")[0]
                        ipaddress = ipaddress.lower().replace("::ffff:", "")
                        ipaddress = ipaddress.replace("\\", "")

                    if data.get(
                            "Name") in "TargetUserName" and data.text != None:
                        username = data.text.split("@")[0]
                        if username[-1:] not in "$":
                            username = username.lower()
                        else:
                            username = "******"

                    if data.get("Name") in [
                            "TargetUserSid", "TargetSid"
                    ] and data.text != None and data.text[0:2] in "S-1":
                        sid = data.text

                    if data.get("Name") in "LogonType":
                        logintype = int(data.text)

                    if data.get("Name") in "Status":
                        status = data.text

                if username != "-" and ipaddress != "-" and ipaddress != "::1" and ipaddress != "127.0.0.1":
                    event_set.append([
                        int(get_child(sysev, "EventID").text), ipaddress,
                        username, logintype, status
                    ])
                    # print("%s,%i,%s,%s,%s,%s" % (int(get_child(sysev, "EventID").text), ipaddress, username, comment, logintype))
                    count_set.append([
                        datetime.datetime(*etime.timetuple()[:4]).strftime(
                            "%Y-%m-%d %H:%M:%S"),
                        int(get_child(sysev, "EventID").text), username
                    ])
                    # print("%s,%s" % (datetime.datetime(*etime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"), username))

                    if ipaddress not in ipaddress_set:
                        ipaddress_set.append(ipaddress)

                    if username not in username_set:
                        username_set.append(username)

                    if sid not in "-":
                        sids[username] = sid

            if int(get_child(sysev, "EventID").text) == 4672:
                logtime = get_child(sysev, "TimeCreated").get("SystemTime")
                if args.fromdate or args.todate:
                    etime = datetime.datetime.strptime(
                        logtime.split(".")[0],
                        "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone)
                    if args.fromdate and fdatetime > etime:
                        continue
                    if args.todate and tdatetime < etime:
                        break

                event_data = get_child(node, "EventData")
                username = "******"
                for data in event_data:
                    if data.get(
                            "Name") in "SubjectUserName" and data.text != None:
                        username = data.text.split("@")[0]
                        if username[-1:] not in "$":
                            username = username.lower()
                        else:
                            username = "******"

                if username not in admins and username != "-":
                    admins.append(username)

    tohours = int((endtime - starttime).total_seconds() / 3600)

    print("\n[*] Load finished.")
    print("[*] Total Event log is %i." % count)
    event_set.sort()
    event_set_uniq = [(g[0], len(list(g[1])))
                      for g in itertools.groupby(event_set)]
    count_set.sort()
    count_set_uniq = [(g[0], len(list(g[1])))
                      for g in itertools.groupby(count_set)]

    # Calculate PageRank
    print("[*] Calculate PageRank.")
    ranks = pagerank(event_set_uniq)

    # Calculate ChangeFinder
    print("[*] Calculate ChangeFinder.")
    timelines, detects = adetection(count_set_uniq, username_set, ranks,
                                    starttime, tohours)

    # Create node
    print("[*] Creating a graph data.")
    tx = GRAPH.begin()
    for ipaddress in ipaddress_set:
        tx.append(statement_ip, {"IP": ipaddress, "rank": ranks[ipaddress]})

    i = 0
    for username in username_set:
        if username in sids:
            sid = sids[username]
        else:
            sid = "-"
        if username in admins:
            rights = "system"
        else:
            rights = "user"
        tx.append(
            statement_user, {
                "user": username,
                "rank": ranks[username],
                "rights": rights,
                "sid": sid,
                "counts": ",".join(map(str, timelines[i * 6])),
                "counts4624": ",".join(map(str, timelines[i * 6 + 1])),
                "counts4625": ",".join(map(str, timelines[i * 6 + 2])),
                "counts4768": ",".join(map(str, timelines[i * 6 + 3])),
                "counts4769": ",".join(map(str, timelines[i * 6 + 4])),
                "counts4776": ",".join(map(str, timelines[i * 6 + 5])),
                "detect": ",".join(map(str, detects[i]))
            })
        i += 1

    for events, count in event_set_uniq:
        tx.append(
            statement_r, {
                "user": events[2],
                "IP": events[1],
                "id": events[0],
                "logintype": events[3],
                "status": events[4],
                "count": count
            })

    tx.append(
        statement_date, {
            "Daterange":
            "Daterange",
            "start":
            datetime.datetime(
                *starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"),
            "end":
            datetime.datetime(
                *endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S")
        })

    tx.process()
    tx.commit()
    print("[*] Creation of a graph data finished.")