def main(): import argparse parser = argparse.ArgumentParser( description="Extract a single EVTX record and pretty print it.") parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") parser.add_argument("record", type=int, help="The record number of the record to extract") args = parser.parse_args() with Evtx(args.evtx) as evtx: record = evtx.get_record(args.record) if record is None: raise RuntimeError("Cannot find the record specified.") try: print prettify_xml( "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?>\n%s" % evtx_record_xml_view(record)) except ExpatError as e: print "Exception: " print repr(e) print "" print "" print "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?>\n%s" % evtx_record_xml_view( record)
def xml_records(filename): with Evtx(filename) as evtx: for xml, record in evtx_file_xml_view(evtx.get_file_header()): try: yield to_lxml(xml), None except etree.XMLSyntaxError as e: yield xml, e, fh
def ParseEvtx(files): writefile = open("..\\RESULTS\\EventLog.txt", "a+") with Evtx(files) as evtx: total = sum(1 for i in evtx.records()) with open(files, 'r') as f: with contextlib.closing( mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf: fh = FileHeader(buf, 0x0) writefile.write( "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?>" ) writefile.write("<Events>") count = 0 for xml, record in evtx_file_xml_view(fh): count += 1 writefile.write(ascii(xml)) bar_len = 55 filled_len = int(round(bar_len * count / float(total))) percents = round(100.0 * count / float(total), 1) bar = '=' * filled_len + '-' * (bar_len - filled_len) sys.stdout.write('[%s] %s%s %s/%s \r' % (bar, percents, '%', count, total)) sys.stdout.flush() writefile.write("</Events>") print print
def xml_records(filename): if OPTIONS.alternate: with Evtx(filename) as evtx: try: for xml, record in evtx_file_xml_view(evtx.get_file_header()): try: yield ef.to_lxml(xml), None except etree.XMLSyntaxError as e: yield xml, e except BinaryParser.OverrunBufferException as e: logging.error("Overrun Buffer Exception!") yield None, e except BinaryParser.ParseException as e: logging.error("Parse Exception!") yield None, e except Exception as e: # UnicodeDecodeError, AttributeError logging.error(e) yield None, e else: parser = PyEvtxParser(filename) try: for record in parser.records(): try: yield ef.to_lxml(record['data']), None except etree.XMLSyntaxError as e: yield record['data'], e except Exception as e: # UnicodeDecodeError, AttributeError, RuntimeError logging.error(e) yield None, e
def xml_records(filename): if args.evtx: with Evtx(filename) as evtx: for xml, record in evtx_file_xml_view(evtx.get_file_header()): try: yield to_lxml(xml), None except etree.XMLSyntaxError as e: yield xml, e if args.xmls: with open(filename, 'r') as fx: xdata = fx.read() fixdata = xdata.replace( "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>", "").replace("</Events>", "").replace("<Events>", "") # fixdata = xdata.replace("<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>", "") del xdata xml_list = re.split( "<Event xmlns=[\'\"]http://schemas.microsoft.com/win/2004/08/events/event[\'\"]>", fixdata) del fixdata for xml in xml_list: if xml.startswith("<System>"): try: yield to_lxml("<Event>" + xml), None except etree.XMLSyntaxError as e: yield xml, e
def main(): import argparse logging.basicConfig(level=logging.DEBUG) logging.getLogger("iso8601.iso8601").setLevel(logging.WARNING) parser = argparse.ArgumentParser( description="Print the record numbers of EVTX log entries " "that match the given EID.") parser.add_argument("input_file", type=str, help="Path to the Windows EVTX file or .pt file") subparsers = parser.add_subparsers(dest="cmd") ts_parser = subparsers.add_parser("ts") ts_parser.add_argument("ts", type=str, default="", help="iso8601 timestamp with which to filter") summary_parser = subparsers.add_parser("summary") serialize_parser = subparsers.add_parser("serialize") serialize_parser.add_argument("pt", type=str, default="state.pt", help=".pt file to serialize parsed trees") args = parser.parse_args() analyzer = ProcessTreeAnalyzer() if args.input_file.lower().endswith(".pt"): g_logger.info("using serialized file") with open(args.input_file, "rb") as f: analyzer.deserialize(f) else: g_logger.info("using evtx log file") with Evtx(args.input_file) as evtx: analyzer.analyze( get_entries_with_eids(evtx, set([4688, 4689, 1, 5]))) pass if args.cmd == "summary": summarize_processes(analyzer.get_processes()) elif args.cmd == "ts": if args.ts == "all": for root in analyzer.get_roots(): draw_tree(analyzer, root) else: g_logger.error("query trees not yet supported") elif args.cmd == "serialize": if not args.pt.lower().endswith(".pt"): g_logger.error("serialize output file must have .pt extension") else: with open(args.pt, "wb") as f: analyzer.serialize(f) else: g_logger.error("unknown command: %s", args.cmd)
def xml_records(filename): with Evtx(filename) as evtx: for xml, record in evtx_file_xml_view(evtx.get_file_header()): try: logger.debug("Yielding XML") yield to_lxml(xml), None except etree.XMLSyntaxError as e: logger.error(e) yield xml, e
def xml_records(filename): try: with Evtx(filename) as evtx: for xml, record in evtx_file_xml_view(evtx.get_file_header()): try: yield etree.fromstring(xml), None except etree.XMLSyntaxError as e: yield xml, e except IOError as e: sys.stderr.write("Error: Cannot open file {}\n".format(filename)) sys.exit(2)
def main(): import argparse parser = argparse.ArgumentParser( description="Print the structure of an EVTX record's template.") parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") parser.add_argument("record", type=int, help="Record number") args = parser.parse_args() with Evtx(args.evtx) as evtx: r = evtx.get_record(args.record) print evtx_template_readable_view(r.root())
def main(): for root, subdirs, files in os.walk("C:\\Windows\\System32\\winevt\\Logs"): for file_names in files: if file_names == "Windows PowerShell.evtx": with Evtx( os.path.abspath( "C:\\Windows\\System32\\winevt\\Logs\\" + file_names)) as evtx: script_data = Magic(evtx) z = OutPut(script_data) get_all_file_zip(z)
def xml_records(filename): """ If the second return value is not None, then it is an Exception encountered during parsing. The first return value will be the XML string. @type filename str @rtype: generator of (etree.Element or str), (None or Exception) """ with Evtx(filename) as evtx: for xml, record in evtx_file_xml_view(evtx.get_file_header()): try: yield to_lxml(xml), None except etree.XMLSyntaxError as e: yield xml, e
def extract_xml(evtx_file): """ Parse the evtx file and extract just the xml parts of each event. If the function throws an error, we'll catch it and just return the value we have with the error. """ with Evtx(evtx_file) as evtx: for xml, record in evtx_file_xml_view(evtx.get_file_header()): try: # Successfully parsed! Return the lxml object yield to_lxml(xml), None except etree.XMLSyntaxError as e: # Parse failed, return what we have and an exception object yield xml, e
def main(): parser = argparse.ArgumentParser(description='Parse Task Scheduler EVTX logs.') parser.add_argument('-p', '--path', help='Path to Task Scheduler EVTX file(s).') args = parser.parse_args() if args.path: input_path = args.path else: print "You need to specify a path to your EVTX file(s)." for root, subdirs, files in os.walk(input_path): for file_names in files: if re.search(".*taskscheduler.*\.evtx$", file_names.lower()): with Evtx(os.path.abspath(input_path + file_names)) as evtx: task_results = get_Tasks(evtx) outputResults(task_results)
def main(): import argparse parser = argparse.ArgumentParser( description="Write the raw data for a EVTX record to STDOUT") parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") parser.add_argument("record", type=int, help="The record number of the record to extract") args = parser.parse_args() with Evtx(args.evtx) as evtx: record = evtx.get_record(args.record) if record is None: raise RuntimeError("Cannot find the record specified.") sys.stdout.write(record.data())
def main(): parser = argparse.ArgumentParser( description='Parse PS1 scripts out of Windows Powershell EVTX Logs.') parser.add_argument('-p', '--path', help='Path to EVTX.') args = parser.parse_args() if args.path: input_path = args.path else: print "You need to specify a path to your Windows Powershell EVTX file." for root, subdirs, files in os.walk(input_path): for file_names in files: if re.search(".*powershell\.evtx$", file_names.lower()): with Evtx(os.path.abspath(input_path + file_names)) as evtx: script_data = get_Scripts(evtx) processData(script_data)
def total_records(args): try: if not args.quiet: sys.stderr.write("Calculating total number of records... ") total = 0 with Evtx(args.evtx) as evtx: fh = evtx.get_file_header() for chunk in fh.chunks(): first_record = chunk.log_first_record_number() last_record = chunk.log_last_record_number() total += 1 + last_record - first_record if not args.quiet: sys.stderr.write(str(total) + "\n") return total except IOError as e: sys.stderr.write("Error: Cannot open file {}\n".format(filename)) sys.exit(2)
def main(): import argparse parser = argparse.ArgumentParser( description= "Parse PowerShell script block log entries (EID 4104) out of the Microsoft-Windows-PowerShell%4Operational.evtx event log. By default, reconstructs all multi-message blocks." ) parser.add_argument( "evtx", type=str, help= "Path to the Microsoft-Windows-PowerShell%%4Operational.evtx event log file to parse" ) parser.add_argument( "-m", "--metadata", type=str, help="Output script block metadata to CSV. Specify output file.") parser.add_argument("-s", "--scriptid", type=str, help="Script block ID to parse. Use with -f or -o") parser.add_argument( "-f", "--file", type=str, help="Write blocks to a single file. Specify output file.") parser.add_argument("-o", "--output", type=str, help="Output directory for script blocks.") parser.add_argument("-a", "--all", action='store_true', help="Output all blocks.") args = parser.parse_args() with Evtx(args.evtx) as evtx: process_entries(get_entries_with_eids(evtx, set([4104])), args.scriptid, args.all, args.output, args.file, args.metadata) pass
def getRecord(self, filename, record_number): # parser = argparse.ArgumentParser( # description="Write the raw data for a EVTX record to STDOUT") # parser.add_argument("evtx", type=str, # help="Path to the Windows EVTX file") # parser.add_argument("record", type=int, # help="The record number of the record to extract") # args = parser.parse_args() if(os.name == 'posix'): log_dir = log_dir_linux else: log_dir = log_dir_windows with Evtx(os.path.join(log_dir, filename)) as evtx: record = evtx.get_record(record_number) if record is None: raise RuntimeError("Cannot find the record specified.") # sys.stdout.write(record.data()) record_str = evtx_record_xml_view(record) # print record_str return record_str
def main(): import argparse parser = argparse.ArgumentParser( description="Print the record numbers of EVTX log entries " "that match the given EID.") parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") parser.add_argument("eid", type=int, help="The EID of records to extract") args = parser.parse_args() with Evtx(args.evtx) as evtx: for xml, record in evtx_file_xml_view(evtx.get_file_header()): try: node = to_lxml(xml) except XMLSyntaxError: continue if args.eid != int(get_child(get_child(node, "System"), "EventID").text): continue print record.record_num()
def main(): import argparse parser = argparse.ArgumentParser( description="Pretty print the binary structure of an EVTX record.") parser.add_argument("evtx", type=str, help="Path to the Windows EVTX file") parser.add_argument("record", type=int, help="Record number") parser.add_argument("--suppress_values", action="store_true", help="Do not print the values of substitutions.") args = parser.parse_args() with Evtx(args.evtx) as evtx: print hex_dump(evtx.get_record(args.record).data()) print("record(absolute_offset=%s)" % \ (evtx.get_record(args.record).offset())) print describe_root(evtx.get_record(args.record), evtx.get_record(args.record).root(), suppress_values=args.suppress_values) print evtx_record_xml_view(evtx.get_record(args.record))
def parse_log(path, codes): arr = dict() with Evtx(path) as log: for record in log.records(): node = record.lxml() first_ch = get_child(node, "System") event_id = int(get_child(first_ch, "EventID").text) if event_id in codes: time = get_child(first_ch, "TimeCreated").attrib["SystemTime"] EventRecordID = get_child(first_ch, "EventRecordID").text event_data = get_child(node, "EventData").getchildren() # data = {"EventRecordID": EventRecordID, "TimeStamp": time} data = {"TimeStamp": time} if event_id == 4688: # process creation data["Type"] = "Creation" for item in event_data: if item.get("Name") == "NewProcessId": data["NewProcessId"] = int(item.text, 16) elif item.get("Name") == "NewProcessName": data["NewProcessName"] = item.text # elif item.get("Name") == "CommandLine": # data["CommandLine"] = item.text elif item.get("Name") == "ProcessId": data["ProcessId"] = int(item.text, 16) elif item.get("Name") == "ParentProcessName": data["ProcessName"] = item.text elif item.get("Name") == "TargetUserName": data["TargetUserName"] = item.text elif event_id == 4689: # process termination data["Type"] = "Termination" for item in event_data: if item.get("Name") == "ProcessId": data["ProcessId"] = int(item.text, 16) elif item.get("Name") == "ProcessName": data["ProcessName"] = item.text arr[EventRecordID] = data return arr
def parse_evtx(evtx_list): event_set = pd.DataFrame(index=[], columns=["eventid", "ipaddress", "username", "logintype", "status", "authname", "date"]) count_set = pd.DataFrame(index=[], columns=["dates", "eventid", "username"]) ml_frame = pd.DataFrame(index=[], columns=["date", "user", "host", "id"]) username_set = [] domain_set = [] admins = [] domains = [] ntmlauth = [] deletelog = [] policylist = [] addusers = {} delusers = {} addgroups = {} removegroups = {} sids = {} hosts = {} dcsync_count = {} dcsync = {} dcshadow_check = [] dcshadow = {} count = 0 record_sum = 0 starttime = None endtime = None if args.timezone: try: datetime.timezone(datetime.timedelta(hours=args.timezone)) tzone = args.timezone print("[*] Time zone is %s." % args.timezone) except: sys.exit("[!] Can't load time zone '%s'." % args.timezone) else: tzone = 0 if args.fromdate: try: fdatetime = datetime.datetime.strptime(args.fromdate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % fdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.") if args.todate: try: tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % tdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.") for evtx_file in evtx_list: if args.evtx: with open(evtx_file, "rb") as fb: fb_data = fb.read(8) if fb_data != EVTX_HEADER: sys.exit("[!] This file is not EVTX format {0}.".format(evtx_file)) chunk = -2 with Evtx(evtx_file) as evtx: fh = evtx.get_file_header() try: while True: last_chunk = list(evtx.chunks())[chunk] last_record = last_chunk.file_last_record_number() chunk -= 1 if last_record > 0: record_sum = record_sum + last_record break except: record_sum = record_sum + fh.next_record_number() if args.xmls: with open(evtx_file, "r") as fb: fb_header = fb.read(6) if "<?xml" not in fb_header: sys.exit("[!] This file is not XML format {0}.".format(evtx_file)) for line in fb: record_sum += line.count("<System>") print("[*] Last record number is %i." % record_sum) # Parse Event log print("[*] Start parsing the EVTX file.") for evtx_file in evtx_list: print("[*] Parse the EVTX file %s." % evtx_file) for node, err in xml_records(evtx_file): if err is not None: continue count += 1 eventid = int(node.xpath("/Event/System/EventID")[0].text) if not count % 100: sys.stdout.write("\r[*] Now loading %i records." % count) sys.stdout.flush() if eventid in EVENT_ID: logtime = node.xpath("/Event/System/TimeCreated")[0].get("SystemTime") try: etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone) except: etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone) stime = datetime.datetime(*etime.timetuple()[:4]) if args.fromdate or args.todate: if args.fromdate and fdatetime > etime: continue if args.todate and tdatetime < etime: endtime = stime break if starttime is None: starttime = stime elif starttime > etime: starttime = stime if endtime is None: endtime = stime elif endtime < etime: endtime = stime event_data = node.xpath("/Event/EventData/Data") logintype = "-" username = "******" domain = "-" ipaddress = "-" hostname = "-" status = "-" sid = "-" authname = "-" ### # Detect admin users # EventID 4672: Special privileges assigned to new logon ### if eventid == 4672: for data in event_data: if data.get("Name") in "SubjectUserName" and data.text is not None and not re.search(UCHECK, data.text): username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if username not in admins and username != "-": admins.append(username) ### # Detect removed user account and added user account. # EventID 4720: A user account was created # EventID 4726: A user account was deleted ### elif eventid in [4720, 4726]: for data in event_data: if data.get("Name") in "TargetUserName" and data.text is not None and not re.search(UCHECK, data.text): username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if eventid == 4720: addusers[username] = etime.strftime("%Y-%m-%d %H:%M:%S") else: delusers[username] = etime.strftime("%Y-%m-%d %H:%M:%S") ### # Detect Audit Policy Change # EventID 4719: System audit policy was changed ### elif eventid == 4719: for data in event_data: if data.get("Name") in "SubjectUserName" and data.text is not None and not re.search(UCHECK, data.text): username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if data.get("Name") in "CategoryId" and data.text is not None and re.search(r"\A%%\d{4}\Z", data.text): category = data.text if data.get("Name") in "SubcategoryGuid" and data.text is not None and re.search(r"\A{[\w\-]*}\Z", data.text): guid = data.text policylist.append([etime.strftime("%Y-%m-%d %H:%M:%S"), username, category, guid.lower(), int(stime.strftime("%s"))]) ### # Detect added users from specific group # EventID 4728: A member was added to a security-enabled global group # EventID 4732: A member was added to a security-enabled local group # EventID 4756: A member was added to a security-enabled universal group ### elif eventid in [4728, 4732, 4756]: for data in event_data: if data.get("Name") in "TargetUserName" and data.text is not None and not re.search(UCHECK, data.text): groupname = data.text elif data.get("Name") in "MemberSid" and data.text not in "-" and data.text is not None and re.search(r"\AS-[0-9\-]*\Z", data.text): usid = data.text addgroups[usid] = "AddGroup: " + groupname + "(" + etime.strftime("%Y-%m-%d %H:%M:%S") + ") " ### # Detect removed users from specific group # EventID 4729: A member was removed from a security-enabled global group # EventID 4733: A member was removed from a security-enabled local group # EventID 4757: A member was removed from a security-enabled universal group ### elif eventid in [4729, 4733, 4757]: for data in event_data: if data.get("Name") in "TargetUserName" and data.text is not None and not re.search(UCHECK, data.text): groupname = data.text elif data.get("Name") in "MemberSid" and data.text not in "-" and data.text is not None and re.search(r"\AS-[0-9\-]*\Z", data.text): usid = data.text removegroups[usid] = "RemoveGroup: " + groupname + "(" + etime.strftime("%Y-%m-%d %H:%M:%S") + ") " ### # Detect DCSync # EventID 4662: An operation was performed on an object ### elif eventid == 4662: for data in event_data: if data.get("Name") in "SubjectUserName" and data.text is not None and not re.search(UCHECK, data.text): username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" dcsync_count[username] = dcsync_count.get(username, 0) + 1 if dcsync_count[username] == 3: dcsync[username] = etime.strftime("%Y-%m-%d %H:%M:%S") dcsync_count[username] = 0 ### # Detect DCShadow # EventID 5137: A directory service object was created # EventID 5141: A directory service object was deleted ### elif eventid in [5137, 5141]: for data in event_data: if data.get("Name") in "SubjectUserName" and data.text is not None and not re.search(UCHECK, data.text): username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if etime.strftime("%Y-%m-%d %H:%M:%S") in dcshadow_check: dcshadow[username] = etime.strftime("%Y-%m-%d %H:%M:%S") else: dcshadow_check.append(etime.strftime("%Y-%m-%d %H:%M:%S")) ### # Parse logon logs # EventID 4624: An account was successfully logged on # EventID 4625: An account failed to log on # EventID 4768: A Kerberos authentication ticket (TGT) was requested # EventID 4769: A Kerberos service ticket was requested # EventID 4776: The domain controller attempted to validate the credentials for an account ### else: for data in event_data: # parse IP Address if data.get("Name") in ["IpAddress", "Workstation"] and data.text is not None and (not re.search(HCHECK, data.text) or re.search(IPv4_PATTERN, data.text) or re.search(r"\A::ffff:\d+\.\d+\.\d+\.\d+\Z", data.text) or re.search(IPv6_PATTERN, data.text)): ipaddress = data.text.split("@")[0] ipaddress = ipaddress.lower().replace("::ffff:", "") ipaddress = ipaddress.replace("\\", "") # Parse hostname if data.get("Name") == "WorkstationName" and data.text is not None and (not re.search(HCHECK, data.text) or re.search(IPv4_PATTERN, data.text) or re.search(r"\A::ffff:\d+\.\d+\.\d+\.\d+\Z", data.text) or re.search(IPv6_PATTERN, data.text)): hostname = data.text.split("@")[0] hostname = hostname.lower().replace("::ffff:", "") hostname = hostname.replace("\\", "") # Parse username if data.get("Name") in "TargetUserName" and data.text is not None and not re.search(UCHECK, data.text): username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" # Parse targeted domain name if data.get("Name") in "TargetDomainName" and data.text is not None and not re.search(HCHECK, data.text): domain = data.text # parse trageted user SID if data.get("Name") in ["TargetUserSid", "TargetSid"] and data.text is not None and re.search(r"\AS-[0-9\-]*\Z", data.text): sid = data.text # parse lonon type if data.get("Name") in "LogonType" and re.search(r"\A\d{1,2}\Z", data.text): logintype = int(data.text) # parse status if data.get("Name") in "Status" and re.search(r"\A0x\w{8}\Z", data.text): status = data.text # parse Authentication package name if data.get("Name") in "AuthenticationPackageName" and re.search(r"\A\w*\Z", data.text): authname = data.text if username != "-" and username != "anonymous logon" and ipaddress != "::1" and ipaddress != "127.0.0.1" and (ipaddress != "-" or hostname != "-"): # generate pandas series if ipaddress != "-": event_series = pd.Series([eventid, ipaddress, username, logintype, status, authname, int(stime.strftime("%s"))], index=event_set.columns) ml_series = pd.Series([etime.strftime("%Y-%m-%d %H:%M:%S"), username, ipaddress, eventid], index=ml_frame.columns) else: event_series = pd.Series([eventid, hostname, username, logintype, status, authname, int(stime.strftime("%s"))], index=event_set.columns) ml_series = pd.Series([etime.strftime("%Y-%m-%d %H:%M:%S"), username, hostname, eventid], index=ml_frame.columns) # append pandas series to dataframe event_set = event_set.append(event_series, ignore_index=True) ml_frame = ml_frame.append(ml_series, ignore_index=True) # print("%s,%i,%s,%s,%s,%s" % (eventid, ipaddress, username, comment, logintype)) count_series = pd.Series([stime.strftime("%Y-%m-%d %H:%M:%S"), eventid, username], index=count_set.columns) count_set = count_set.append(count_series, ignore_index=True) # print("%s,%s" % (stime.strftime("%Y-%m-%d %H:%M:%S"), username)) if domain != "-": domain_set.append([username, domain]) if username not in username_set: username_set.append(username) if domain not in domains and domain != "-": domains.append(domain) if sid != "-": sids[username] = sid if hostname != "-" and ipaddress != "-": hosts[ipaddress] = hostname if authname in "NTML" and authname not in ntmlauth: ntmlauth.append(username) ### # Detect the audit log deletion # EventID 1102: The audit log was cleared ### if eventid == 1102: logtime = node.xpath("/Event/System/TimeCreated")[0].get("SystemTime") try: etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone) except: etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone) deletelog.append(etime.strftime("%Y-%m-%d %H:%M:%S")) namespace = "http://manifests.microsoft.com/win/2004/08/windows/eventlog" user_data = node.xpath("/Event/UserData/ns:LogFileCleared/ns:SubjectUserName", namespaces={"ns": namespace}) domain_data = node.xpath("/Event/UserData/ns:LogFileCleared/ns:SubjectDomainName", namespaces={"ns": namespace}) if user_data[0].text is not None: username = user_data[0].text.split("@")[0] if username[-1:] not in "$": deletelog.append(username.lower()) else: deletelog.append("-") else: deletelog.append("-") if domain_data[0].text is not None: deletelog.append(domain_data[0].text) else: deletelog.append("-") print("\n[*] Load finished.") print("[*] Total Event log is %i." % count) if not username_set: sys.exit("[!] This event log did not include logs to be visualized. Please check the details of the event log.") tohours = int((endtime - starttime).total_seconds() / 3600) if hosts: event_set = event_set.replace(hosts) event_set_bydate = event_set event_set_bydate["count"] = event_set_bydate.groupby(["eventid", "ipaddress", "username", "logintype", "status", "authname", "date"])["eventid"].transform("count") event_set_bydate = event_set_bydate.drop_duplicates() event_set = event_set.drop("date", axis=1) event_set["count"] = event_set.groupby(["eventid", "ipaddress", "username", "logintype", "status", "authname"])["eventid"].transform("count") event_set = event_set.drop_duplicates() count_set["count"] = count_set.groupby(["dates", "eventid", "username"])["dates"].transform("count") count_set = count_set.drop_duplicates() domain_set_uniq = list(map(list, set(map(tuple, domain_set)))) # Learning event logs using Hidden Markov Model if hosts: ml_frame = ml_frame.replace(hosts) ml_frame = ml_frame.sort_values(by="date") if args.learn: print("[*] Learning event logs using Hidden Markov Model.") learnhmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3])) # Calculate ChangeFinder print("[*] Calculate ChangeFinder.") timelines, detects, detect_cf = adetection(count_set, username_set, starttime, tohours) # Calculate Hidden Markov Model print("[*] Calculate Hidden Markov Model.") detect_hmm = decodehmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3])) # Calculate PageRank print("[*] Calculate PageRank.") ranks = pagerank(event_set, admins, detect_hmm, detect_cf, ntmlauth) # Create node print("[*] Creating a graph data.") try: graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/" GRAPH = Graph(graph_http) except: sys.exit("[!] Can't connect Neo4j Database.") tx = GRAPH.begin() hosts_inv = {v: k for k, v in hosts.items()} for ipaddress in event_set["ipaddress"].drop_duplicates(): if ipaddress in hosts_inv: hostname = hosts_inv[ipaddress] else: hostname = ipaddress # add the IPAddress node to neo4j tx.append(statement_ip, {"IP": ipaddress, "rank": ranks[ipaddress], "hostname": hostname}) i = 0 for username in username_set: if username in sids: sid = sids[username] else: sid = "-" if username in admins: rights = "system" else: rights = "user" ustatus = "" if username in addusers: ustatus += "Created(" + addusers[username] + ") " if username in delusers: ustatus += "Deleted(" + delusers[username] + ") " if sid in addgroups: ustatus += addgroups[sid] if sid in removegroups: ustatus += removegroups[sid] if username in dcsync: ustatus += "DCSync(" + dcsync[username] + ") " if username in dcshadow: ustatus += "DCShadow(" + dcshadow[username] + ") " if not ustatus: ustatus = "-" # add the username node to neo4j tx.append(statement_user, {"user": username[:-1], "rank": ranks[username], "rights": rights, "sid": sid, "status": ustatus, "counts": ",".join(map(str, timelines[i*6])), "counts4624": ",".join(map(str, timelines[i*6+1])), "counts4625": ",".join(map(str, timelines[i*6+2])), "counts4768": ",".join(map(str, timelines[i*6+3])), "counts4769": ",".join(map(str, timelines[i*6+4])), "counts4776": ",".join(map(str, timelines[i*6+5])), "detect": ",".join(map(str, detects[i]))}) i += 1 for domain in domains: # add the domain node to neo4j tx.append(statement_domain, {"domain": domain}) for _, events in event_set_bydate.iterrows(): # add the (username)-(event)-(ip) link to neo4j tx.append(statement_r, {"user": events["username"][:-1], "IP": events["ipaddress"], "id": events["eventid"], "logintype": events["logintype"], "status": events["status"], "count": events["count"], "authname": events["authname"], "date": events["date"]}) for username, domain in domain_set_uniq: # add (username)-()-(domain) link to neo4j tx.append(statement_dr, {"user": username[:-1], "domain": domain}) # add the date node to neo4j tx.append(statement_date, {"Daterange": "Daterange", "start": datetime.datetime(*starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"), "end": datetime.datetime(*endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S")}) if len(deletelog): # add the delete flag node to neo4j tx.append(statement_del, {"deletetime": deletelog[0], "user": deletelog[1], "domain": deletelog[2]}) if len(policylist): id = 0 for policy in policylist: if policy[2] in CATEGORY_IDs: category = CATEGORY_IDs[policy[2]] else: category = policy[2] if policy[3] in AUDITING_CONSTANTS: sub = AUDITING_CONSTANTS[policy[3]] else: sub = policy[3] username = policy[1] # add the policy id node to neo4j tx.append(statement_pl, {"id": id, "changetime": policy[0], "category": category, "sub": sub}) # add (username)-(policy)-(id) link to neo4j tx.append(statement_pr, {"user": username[:-1], "id": id, "date": policy[4]}) id += 1 tx.process() tx.commit() print("[*] Creation of a graph data finished.")
import lxml.etree as et from Evtx.Evtx import Evtx from eventcodes import EVENT_CODES with Evtx('WindowsEvents.evtx') as ev: for i, rec in enumerate(ev.records(), 1): # loop over records xml_element = rec.lxml() # get lxml Element obj ns = xml_element.nsmap # get XML namespace # Dump XML of entire record: # print(et.tostring(xml_element, pretty_print=True).decode()) # print("-" * 60) # if i == 10: # break # continue event_id = xml_element.findtext('.//EventID', namespaces=ns) if event_id == "10016": continue print(f"Record {i}:") execution_element = xml_element.find('.//Execution', namespaces=ns) if execution_element is not None: process_id = execution_element.get('ProcessID') else: process_id = 'N/A' print("\tProcess ID:", process_id) computer = xml_element.findtext('.//Computer', namespaces=ns) print("\tComputer:", computer) record_id = xml_element.findtext('.//EventRecordID', namespaces=ns)
def parse_evtx(evtx_list): event_set = pd.DataFrame(index=[], columns=[ "eventid", "ipaddress", "username", "logintype", "status", "authname" ]) count_set = pd.DataFrame(index=[], columns=["dates", "eventid", "username"]) ml_frame = pd.DataFrame(index=[], columns=["date", "user", "host", "id"]) username_set = [] domain_set = [] admins = [] domains = [] ntmlauth = [] deletelog = [] policylist = [] addusers = {} delusers = {} addgroups = {} removegroups = {} sids = {} hosts = {} dcsync_count = {} dcsync = {} dcshadow_check = [] dcshadow = {} count = 0 record_sum = 0 starttime = None endtime = None if args.timezone: try: datetime.timezone(datetime.timedelta(hours=args.timezone)) tzone = args.timezone print("[*] Time zone is %s." % args.timezone) except: sys.exit("[!] Can't load time zone '%s'." % args.timezone) else: tzone = 0 if args.fromdate: try: fdatetime = datetime.datetime.strptime(args.fromdate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % fdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.") if args.todate: try: tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % tdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.") for evtx_file in evtx_list: if args.evtx: with open(evtx_file, "rb") as fb: fb_data = fb.read()[0:8] if fb_data != EVTX_HEADER: sys.exit("[!] This file is not EVTX format {0}.".format( evtx_file)) chunk = -2 with Evtx(evtx_file) as evtx: fh = evtx.get_file_header() try: while True: last_chunk = list(evtx.chunks())[chunk] last_record = last_chunk.file_last_record_number() chunk -= 1 if last_record > 0: record_sum = record_sum + last_record break except: record_sum = record_sum + fh.next_record_number() if args.xmls: with open(evtx_file, "r") as fb: fb_data = fb.read() if "<?xml" not in fb_data[0:6]: sys.exit("[!] This file is not XML format {0}.".format( evtx_file)) record_sum += fb_data.count("<System>") del fb_data print("[*] Last record number is %i." % record_sum) # Parse Event log print("[*] Start parsing the EVTX file.") for evtx_file in evtx_list: print("[*] Parse the EVTX file %s." % evtx_file) for node, err in xml_records(evtx_file): if err is not None: continue count += 1 eventid = int(node.xpath("/Event/System/EventID")[0].text) if not count % 100: sys.stdout.write("\r[*] Now loading %i records." % count) sys.stdout.flush() if eventid in EVENT_ID: logtime = node.xpath("/Event/System/TimeCreated")[0].get( "SystemTime") try: etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone) except: etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone) stime = datetime.datetime(*etime.timetuple()[:4]) if args.fromdate or args.todate: if args.fromdate and fdatetime > etime: continue if args.todate and tdatetime < etime: endtime = stime break if starttime is None: starttime = stime elif starttime > etime: starttime = stime if endtime is None: endtime = stime elif endtime < etime: endtime = stime event_data = node.xpath("/Event/EventData/Data") logintype = "-" username = "******" domain = "-" ipaddress = "-" hostname = "-" status = "-" sid = "-" authname = "-" if eventid == 4672: for data in event_data: if data.get( "Name" ) in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if username not in admins and username != "-": admins.append(username) elif eventid in [4720, 4726]: for data in event_data: if data.get( "Name" ) in "TargetUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if eventid == 4720: addusers[username] = etime.strftime( "%Y-%m-%d %H:%M:%S") else: delusers[username] = etime.strftime( "%Y-%m-%d %H:%M:%S") elif eventid == 4719: for data in event_data: if data.get( "Name" ) in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if data.get( "Name") in "CategoryId" and data.text != None: category = data.text if data.get( "Name" ) in "SubcategoryGuid" and data.text != None: guid = data.text policylist.append([ etime.strftime("%Y-%m-%d %H:%M:%S"), username, category, guid.lower() ]) elif eventid in [4728, 4732, 4756]: for data in event_data: if data.get( "Name" ) in "TargetUserName" and data.text != None: groupname = data.text elif data.get( "Name" ) in "MemberSid" and data.text not in "-" and data.text != None: usid = data.text addgroups[ usid] = "AddGroup: " + groupname + "(" + etime.strftime( "%Y-%m-%d %H:%M:%S") + ") " elif eventid in [4729, 4733, 4757]: for data in event_data: if data.get( "Name" ) in "TargetUserName" and data.text != None: groupname = data.text elif data.get( "Name" ) in "MemberSid" and data.text not in "-" and data.text != None: usid = data.text removegroups[ usid] = "RemoveGroup: " + groupname + "(" + etime.strftime( "%Y-%m-%d %H:%M:%S") + ") " elif eventid == 4662: for data in event_data: if data.get( "Name" ) in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" dcsync_count[username] = dcsync_count.get(username, 0) + 1 if dcsync_count[username] == 3: dcsync[username] = etime.strftime( "%Y-%m-%d %H:%M:%S") dcsync_count[username] = 0 elif eventid in [5137, 5141]: for data in event_data: if data.get( "Name" ) in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if etime.strftime( "%Y-%m-%d %H:%M:%S") in dcshadow_check: dcshadow[username] = etime.strftime( "%Y-%m-%d %H:%M:%S") else: dcshadow_check.append( etime.strftime("%Y-%m-%d %H:%M:%S")) else: for data in event_data: if data.get("Name") in ["IpAddress", "Workstation" ] and data.text != None: ipaddress = data.text.split("@")[0] ipaddress = ipaddress.lower().replace( "::ffff:", "") ipaddress = ipaddress.replace("\\", "") if data.get( "Name" ) == "WorkstationName" and data.text != None: hostname = data.text.split("@")[0] hostname = hostname.lower().replace("::ffff:", "") hostname = hostname.replace("\\", "") if data.get( "Name" ) in "TargetUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() + "@" else: username = "******" if data.get( "Name" ) in "TargetDomainName" and data.text != None: domain = data.text if data.get("Name") in [ "TargetUserSid", "TargetSid" ] and data.text != None and data.text[0:2] in "S-1": sid = data.text if data.get("Name") in "LogonType": logintype = int(data.text) if data.get("Name") in "Status": status = data.text if data.get("Name") in "AuthenticationPackageName": authname = data.text if username != "-" and ipaddress != "::1" and ipaddress != "127.0.0.1" and ( ipaddress != "-" or hostname != "-"): if ipaddress != "-": event_series = pd.Series([ eventid, ipaddress, username, logintype, status, authname ], index=event_set.columns) ml_series = pd.Series([ etime.strftime("%Y-%m-%d %H:%M:%S"), username, ipaddress, eventid ], index=ml_frame.columns) else: event_series = pd.Series([ eventid, hostname, username, logintype, status, authname ], index=event_set.columns) ml_series = pd.Series([ etime.strftime("%Y-%m-%d %H:%M:%S"), username, hostname, eventid ], index=ml_frame.columns) event_set = event_set.append(event_series, ignore_index=True) ml_frame = ml_frame.append(ml_series, ignore_index=True) # print("%s,%i,%s,%s,%s,%s" % (eventid, ipaddress, username, comment, logintype)) count_series = pd.Series([ stime.strftime("%Y-%m-%d %H:%M:%S"), eventid, username ], index=count_set.columns) count_set = count_set.append(count_series, ignore_index=True) # print("%s,%s" % (stime.strftime("%Y-%m-%d %H:%M:%S"), username)) if domain != "-": domain_set.append([username, domain]) if username not in username_set: username_set.append(username) if domain not in domains and domain != "-": domains.append(domain) if sid != "-": sids[username] = sid if hostname != "-" and ipaddress != "-": hosts[hostname] = ipaddress if authname in "NTML" and authname not in ntmlauth: ntmlauth.append(username) if eventid == 1102: logtime = node.xpath("/Event/System/TimeCreated")[0].get( "SystemTime") try: etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone) except: etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone) deletelog.append(etime.strftime("%Y-%m-%d %H:%M:%S")) namespace = "http://manifests.microsoft.com/win/2004/08/windows/eventlog" user_data = node.xpath( "/Event/UserData/ns:LogFileCleared/ns:SubjectUserName", namespaces={"ns": namespace}) domain_data = node.xpath( "/Event/UserData/ns:LogFileCleared/ns:SubjectDomainName", namespaces={"ns": namespace}) if user_data[0].text != None: username = user_data[0].text.split("@")[0] if username[-1:] not in "$": deletelog.append(username.lower()) else: deletelog.append("-") else: deletelog.append("-") if domain_data[0].text != None: deletelog.append(domain_data[0].text) else: deletelog.append("-") print("\n[*] Load finished.") print("[*] Total Event log is %i." % count) if not username_set: sys.exit( "[!] This event log did not include logs to be visualized. Please check the details of the event log." ) tohours = int((endtime - starttime).total_seconds() / 3600) if hosts: event_set = event_set.replace(hosts) event_set["count"] = event_set.groupby([ "eventid", "ipaddress", "username", "logintype", "status", "authname" ])["eventid"].transform("count") event_set = event_set.drop_duplicates() count_set["count"] = count_set.groupby(["dates", "eventid", "username" ])["dates"].transform("count") count_set = count_set.drop_duplicates() domain_set_uniq = list(map(list, set(map(tuple, domain_set)))) # Learning event logs using Hidden Markov Model if hosts: ml_frame = ml_frame.replace(hosts) ml_frame = ml_frame.sort_values(by="date") if args.learn: print("[*] Learning event logs using Hidden Markov Model.") learnhmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3])) # Calculate ChangeFinder print("[*] Calculate ChangeFinder.") timelines, detects, detect_cf = adetection(count_set, username_set, starttime, tohours) # Calculate Hidden Markov Model print("[*] Calculate Hidden Markov Model.") detect_hmm = decodehmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3])) # Calculate PageRank print("[*] Calculate PageRank.") ranks = pagerank(event_set, admins, detect_hmm, detect_cf, ntmlauth) # Create node print("[*] Creating a graph data.") try: graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/" GRAPH = Graph(graph_http) except: sys.exit("[!] Can't connect Neo4j Database.") tx = GRAPH.begin() hosts_inv = {v: k for k, v in hosts.items()} for ipaddress in event_set["ipaddress"].drop_duplicates(): if ipaddress in hosts_inv: hostname = hosts_inv[ipaddress] else: hostname = ipaddress tx.append(statement_ip, { "IP": ipaddress, "rank": ranks[ipaddress], "hostname": hostname }) i = 0 for username in username_set: if username in sids: sid = sids[username] else: sid = "-" if username in admins: rights = "system" else: rights = "user" ustatus = "" if username in addusers: ustatus += "Created(" + addusers[username] + ") " if username in delusers: ustatus += "Deleted(" + delusers[username] + ") " if sid in addgroups: ustatus += addgroups[sid] if sid in removegroups: ustatus += removegroups[sid] if username in dcsync: ustatus += "DCSync(" + dcsync[username] + ") " if username in dcshadow: ustatus += "DCShadow(" + dcshadow[username] + ") " if not ustatus: ustatus = "-" tx.append( statement_user, { "user": username[:-1], "rank": ranks[username], "rights": rights, "sid": sid, "status": ustatus, "counts": ",".join(map(str, timelines[i * 6])), "counts4624": ",".join(map(str, timelines[i * 6 + 1])), "counts4625": ",".join(map(str, timelines[i * 6 + 2])), "counts4768": ",".join(map(str, timelines[i * 6 + 3])), "counts4769": ",".join(map(str, timelines[i * 6 + 4])), "counts4776": ",".join(map(str, timelines[i * 6 + 5])), "detect": ",".join(map(str, detects[i])) }) i += 1 for domain in domains: tx.append(statement_domain, {"domain": domain}) for _, events in event_set.iterrows(): tx.append( statement_r, { "user": events["username"][:-1], "IP": events["ipaddress"], "id": events["eventid"], "logintype": events["logintype"], "status": events["status"], "count": events["count"], "authname": events["authname"] }) for username, domain in domain_set_uniq: tx.append(statement_dr, {"user": username[:-1], "domain": domain}) tx.append( statement_date, { "Daterange": "Daterange", "start": datetime.datetime( *starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"), "end": datetime.datetime( *endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S") }) if len(deletelog): tx.append( statement_del, { "deletetime": deletelog[0], "user": deletelog[1], "domain": deletelog[2] }) if len(policylist): id = 0 for policy in policylist: if policy[2] in CATEGORY_IDs: category = CATEGORY_IDs[policy[2]] else: category = policy[2] if policy[3] in AUDITING_CONSTANTS: sub = AUDITING_CONSTANTS[policy[3]] else: sub = policy[3] username = policy[1] tx.append( statement_pl, { "id": id, "changetime": policy[0], "category": category, "sub": sub }) tx.append(statement_pr, {"user": username[:-1], "id": id}) id += 1 tx.process() tx.commit() print("[*] Creation of a graph data finished.")
def parse_evtx(evtx_list, GRAPH): event_set = pd.DataFrame(index=[], columns=["eventid", "ipaddress", "username", "logintype", "status", "authname"]) count_set = pd.DataFrame(index=[], columns=["dates", "eventid", "username"]) username_set = [] domain_set = [] admins = [] domains = [] sids = {} hosts = {} count = 0 record_sum = 0 starttime = None endtime = None if args.timezone: try: datetime.timezone(datetime.timedelta(hours=args.timezone)) tzone = args.timezone print("[*] Time zone is %s." % args.timezone) except: sys.exit("[!] Can't load time zone '%s'." % args.timezone) else: tzone = 0 if args.fromdate: try: fdatetime = datetime.datetime.strptime(args.fromdate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % fdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.") if args.todate: try: tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % tdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.") for evtx_file in evtx_list: if args.evtx: with open(evtx_file, "rb") as fb: fb_data = fb.read()[0:8] if fb_data != EVTX_HEADER: sys.exit("[!] This file is not EVTX format {0}.".format(evtx_file)) chunk = -2 with Evtx(evtx_file) as evtx: fh = evtx.get_file_header() try: while True: last_chunk = list(evtx.chunks())[chunk] last_record = last_chunk.file_last_record_number() chunk -= 1 if last_record > 0: record_sum = record_sum + last_record break except: record_sum = record_sum + fh.next_record_number() if args.xmls: with open(evtx_file, "r") as fb: fb_data = fb.read() if "<?xml" not in fb_data[0:6]: sys.exit("[!] This file is not XML format {0}.".format(evtx_file)) record_sum += fb_data.count("<System>") del fb_data print("[*] Last record number is %i." % record_sum) # Parse Event log print("[*] Start parsing the EVTX file.") for evtx_file in evtx_list: print("[*] Parse the EVTX file %s." % evtx_file) for node, err in xml_records(evtx_file): if err is not None: continue count += 1 eventid = int(node.xpath("/Event/System/EventID")[0].text) if not count % 100: sys.stdout.write("\r[*] Now loading %i records." % count) sys.stdout.flush() if eventid in EVENT_ID: logtime = node.xpath("/Event/System/TimeCreated")[0].get("SystemTime") try: etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone) except: etime = datetime.datetime.strptime(logtime.split(".")[0], "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone) stime = datetime.datetime(*etime.timetuple()[:4]) if args.fromdate or args.todate: if args.fromdate and fdatetime > etime: continue if args.todate and tdatetime < etime: endtime = stime break if starttime is None: starttime = stime elif starttime > etime: starttime = stime if endtime is None: endtime = stime elif endtime < etime: endtime = stime event_data = node.xpath("/Event/EventData/Data") logintype = "-" username = "******" domain = "-" ipaddress = "-" hostname = "-" status = "-" sid = "-" authname = "-" if eventid == 4672: for data in event_data: if data.get("Name") in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() else: username = "******" if username not in admins and username != "-": admins.append(username) else: for data in event_data: if data.get("Name") in ["IpAddress", "Workstation"] and data.text != None: ipaddress = data.text.split("@")[0] ipaddress = ipaddress.lower().replace("::ffff:", "") ipaddress = ipaddress.replace("\\", "") if data.get("Name") == "WorkstationName" and data.text != None: hostname = data.text.split("@")[0] hostname = hostname.lower().replace("::ffff:", "") hostname = hostname.replace("\\", "") if data.get("Name") in "TargetUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() else: username = "******" if data.get("Name") in "TargetDomainName" and data.text != None: domain = data.text if data.get("Name") in ["TargetUserSid", "TargetSid"] and data.text != None and data.text[0:2] in "S-1": sid = data.text if data.get("Name") in "LogonType": logintype = int(data.text) if data.get("Name") in "Status": status = data.text if data.get("Name") in "AuthenticationPackageName": authname = data.text if username != "-" and ipaddress != "-" and ipaddress != "::1" and ipaddress != "127.0.0.1": event_series = pd.Series([eventid, ipaddress, username, logintype, status, authname], index=event_set.columns) event_set = event_set.append(event_series, ignore_index = True) # print("%s,%i,%s,%s,%s,%s" % (eventid, ipaddress, username, comment, logintype)) count_series = pd.Series([stime.strftime("%Y-%m-%d %H:%M:%S"), eventid, username], index=count_set.columns) count_set = count_set.append(count_series, ignore_index = True) # print("%s,%s" % (stime.strftime("%Y-%m-%d %H:%M:%S"), username)) if domain != "-": domain_set.append([username, domain]) if username not in username_set: username_set.append(username) if domain not in domains and domain != "-": domains.append(domain) if sid not in "-": sids[username] = sid if hostname not in "-": hosts[hostname] = ipaddress tohours = int((endtime - starttime).total_seconds() / 3600) print("\n[*] Load finished.") print("[*] Total Event log is %i." % count) event_set = event_set.replace(hosts) event_set["count"] = event_set.groupby(["eventid", "ipaddress", "username", "logintype", "status", "authname"])["eventid"].transform("count") event_set = event_set.drop_duplicates() count_set["count"] = count_set.groupby(["dates", "eventid", "username"])["dates"].transform("count") count_set = count_set.drop_duplicates() domain_set_uniq = list(map(list, set(map(tuple, domain_set)))) # Calculate PageRank print("[*] Calculate PageRank.") ranks = pagerank(event_set) # Calculate ChangeFinder print("[*] Calculate ChangeFinder.") timelines, detects = adetection(count_set, username_set, ranks, starttime, tohours) # Create node print("[*] Creating a graph data.") tx = GRAPH.begin() hosts_inv = {v:k for k, v in hosts.items()} for ipaddress in event_set["ipaddress"].drop_duplicates(): if ipaddress in hosts_inv: hostname = hosts_inv[ipaddress] else: hostname = ipaddress tx.append(statement_ip, {"IP": ipaddress, "rank": ranks[ipaddress], "hostname": hostname}) i = 0 for username in username_set: if username in sids: sid = sids[username] else: sid = "-" if username in admins: rights = "system" else: rights = "user" tx.append(statement_user, {"user": username, "rank": ranks[username],"rights": rights,"sid": sid, "counts": ",".join(map(str, timelines[i*6])), "counts4624": ",".join(map(str, timelines[i*6+1])), "counts4625": ",".join(map(str, timelines[i*6+2])), "counts4768": ",".join(map(str, timelines[i*6+3])), "counts4769": ",".join(map(str, timelines[i*6+4])), "counts4776": ",".join(map(str, timelines[i*6+5])), "detect": ",".join(map(str, detects[i]))}) i += 1 for domain in domains: tx.append(statement_domain, {"domain": domain}) for _, events in event_set.iterrows(): tx.append(statement_r, {"user": events["username"], "IP": events["ipaddress"], "id": events["eventid"], "logintype": events["logintype"], "status": events["status"], "count": events["count"], "authname": events["authname"]}) for username, domain in domain_set_uniq: tx.append(statement_dr, {"user": username, "domain": domain}) tx.append(statement_date, {"Daterange": "Daterange", "start": datetime.datetime(*starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"), "end": datetime.datetime(*endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S")}) tx.process() tx.commit() print("[*] Creation of a graph data finished.")
def parse_evtx(evtx_list, GRAPH): event_set = [] count_set = [] ipaddress_set = [] username_set = [] admins = [] sids = {} count = 0 record_sum = 0 starttime = None endtime = None if args.timezone: try: datetime.timezone(datetime.timedelta(hours=args.timezone)) tzone = args.timezone print("[*] Time zone is %s." % args.timezone) except: sys.exit("[!] Can't load time zone '%s'." % args.timezone) else: tzone = 0 if args.fromdate: try: fdatetime = datetime.datetime.strptime(args.fromdate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % fdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.") if args.todate: try: tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S") print("[*] Parse the EVTX from %s." % tdatetime.strftime("%Y-%m-%d %H:%M:%S")) except: sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.") for evtx_file in evtx_list: fb = open(evtx_file, "rb") fb_data = fb.read()[0:8] if fb_data != EVTX_HEADER: sys.exit("[!] This file is not EVTX format {0}.".format(evtx_file)) fb.close() chunk = -2 with Evtx(evtx_file) as evtx: fh = evtx.get_file_header() while True: last_chunk = list(evtx.chunks())[chunk] last_record = last_chunk.file_last_record_number() chunk -= 1 if last_record > 0: record_sum = record_sum + last_record break print("[*] Last recode number is %i." % record_sum) # Parse Event log print("[*] Start parsing the EVTX file.") for evtx_file in evtx_list: print("[*] Parse the EVTX file %s." % evtx_file) for node, err in xml_records(evtx_file): count += 1 if not count % 100: sys.stdout.write("\r[*] Now loading %i records." % count) sys.stdout.flush() if err is not None: continue sysev = get_child(node, "System") if int(get_child(sysev, "EventID").text) in EVENT_ID: logtime = get_child(sysev, "TimeCreated").get("SystemTime") etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone) if args.fromdate or args.todate: if args.fromdate and fdatetime > etime: continue if args.todate and tdatetime < etime: endtime = datetime.datetime(*etime.timetuple()[:4]) break if starttime is None: starttime = datetime.datetime(*etime.timetuple()[:4]) elif starttime > etime: starttime = datetime.datetime(*etime.timetuple()[:4]) if endtime is None: endtime = datetime.datetime(*etime.timetuple()[:4]) elif endtime < etime: endtime = datetime.datetime(*etime.timetuple()[:4]) event_data = get_child(node, "EventData") logintype = "-" username = "******" ipaddress = "-" status = "-" sid = "-" for data in event_data: if data.get("Name") in ["IpAddress", "Workstation" ] and data.text != None: ipaddress = data.text.split("@")[0] ipaddress = ipaddress.lower().replace("::ffff:", "") ipaddress = ipaddress.replace("\\", "") if data.get( "Name") in "TargetUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() else: username = "******" if data.get("Name") in [ "TargetUserSid", "TargetSid" ] and data.text != None and data.text[0:2] in "S-1": sid = data.text if data.get("Name") in "LogonType": logintype = int(data.text) if data.get("Name") in "Status": status = data.text if username != "-" and ipaddress != "-" and ipaddress != "::1" and ipaddress != "127.0.0.1": event_set.append([ int(get_child(sysev, "EventID").text), ipaddress, username, logintype, status ]) # print("%s,%i,%s,%s,%s,%s" % (int(get_child(sysev, "EventID").text), ipaddress, username, comment, logintype)) count_set.append([ datetime.datetime(*etime.timetuple()[:4]).strftime( "%Y-%m-%d %H:%M:%S"), int(get_child(sysev, "EventID").text), username ]) # print("%s,%s" % (datetime.datetime(*etime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"), username)) if ipaddress not in ipaddress_set: ipaddress_set.append(ipaddress) if username not in username_set: username_set.append(username) if sid not in "-": sids[username] = sid if int(get_child(sysev, "EventID").text) == 4672: logtime = get_child(sysev, "TimeCreated").get("SystemTime") if args.fromdate or args.todate: etime = datetime.datetime.strptime( logtime.split(".")[0], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone) if args.fromdate and fdatetime > etime: continue if args.todate and tdatetime < etime: break event_data = get_child(node, "EventData") username = "******" for data in event_data: if data.get( "Name") in "SubjectUserName" and data.text != None: username = data.text.split("@")[0] if username[-1:] not in "$": username = username.lower() else: username = "******" if username not in admins and username != "-": admins.append(username) tohours = int((endtime - starttime).total_seconds() / 3600) print("\n[*] Load finished.") print("[*] Total Event log is %i." % count) event_set.sort() event_set_uniq = [(g[0], len(list(g[1]))) for g in itertools.groupby(event_set)] count_set.sort() count_set_uniq = [(g[0], len(list(g[1]))) for g in itertools.groupby(count_set)] # Calculate PageRank print("[*] Calculate PageRank.") ranks = pagerank(event_set_uniq) # Calculate ChangeFinder print("[*] Calculate ChangeFinder.") timelines, detects = adetection(count_set_uniq, username_set, ranks, starttime, tohours) # Create node print("[*] Creating a graph data.") tx = GRAPH.begin() for ipaddress in ipaddress_set: tx.append(statement_ip, {"IP": ipaddress, "rank": ranks[ipaddress]}) i = 0 for username in username_set: if username in sids: sid = sids[username] else: sid = "-" if username in admins: rights = "system" else: rights = "user" tx.append( statement_user, { "user": username, "rank": ranks[username], "rights": rights, "sid": sid, "counts": ",".join(map(str, timelines[i * 6])), "counts4624": ",".join(map(str, timelines[i * 6 + 1])), "counts4625": ",".join(map(str, timelines[i * 6 + 2])), "counts4768": ",".join(map(str, timelines[i * 6 + 3])), "counts4769": ",".join(map(str, timelines[i * 6 + 4])), "counts4776": ",".join(map(str, timelines[i * 6 + 5])), "detect": ",".join(map(str, detects[i])) }) i += 1 for events, count in event_set_uniq: tx.append( statement_r, { "user": events[2], "IP": events[1], "id": events[0], "logintype": events[3], "status": events[4], "count": count }) tx.append( statement_date, { "Daterange": "Daterange", "start": datetime.datetime( *starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"), "end": datetime.datetime( *endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S") }) tx.process() tx.commit() print("[*] Creation of a graph data finished.")