def mergeRegistoryInfomation(inventoryapp, inventoryfile): find_app = [] find_file = [] merged = [] for app_data in inventoryapp: for file_data in inventoryfile: if app_data.program_id == file_data.program_id: merged.append( ExecutionEntryUpdate1709( *(app_data + file_data[1:] + (utility().get_timezone_str(), )))) find_app.append(app_data) find_file.append(file_data) leftovers_app = list(set(inventoryapp) - set(find_app)) leftovers_file = list(set(inventoryfile) - set(find_file)) for file_data in leftovers_file: merged.append( ExecutionEntryUpdate1709(*(("", ) * 22 + file_data[1:] + (utility().get_timezone_str(), )))) for app_data in leftovers_app: merged.append( ExecutionEntryUpdate1709(*(app_data + ("", ) * 17 + (utility().get_timezone_str(), )))) return merged
def parse_windows_timestamp(qword): try: return utility().get_timestamp_str(qword) except ValueError: return "" except OSError: return ""
def standardOutput(ee, file, pf, header_flag, fields): w = unicodecsv.writer(pf, delimiter="\t", lineterminator="\n", encoding="utf-8", quoting=unicodecsv.QUOTE_ALL) computer_name = utility().get_computer_name(file) if header_flag: w.writerow(["Computer Name"]+[e.collname for e in fields]) for e in ee: w.writerow([computer_name]+[getattr(e, i.name) for i in fields])
def standardOutput(ee, args, file, pf, count): w = unicodecsv.writer(pf, delimiter="\t", lineterminator="\n", encoding="utf-8") computer_name = utility().get_computer_name(file) if count == 0 and not args.noheader: w.writerow(["Computer Name"] + map(lambda e: e.collname, FIELDS)) for e in ee: w.writerow([computer_name] + map(lambda i: getattr(e, i.name), FIELDS))
def make_timezone_getter(): return utility().get_timezone_str()
def parsepf(root, pf, filename, header_version, fileindex): output_csv = [] output_list = [] #convert sjis -> utf-8 for output filename_cp932 = filename.decode('cp932') filename = filename_cp932.encode('utf-8') #prefetch filename for output list output_list.append(root+"\\"+filename+"\n") #search NULL in executable name pf.seek(16) i = 0 while i < 60: if utility().hextoint(pf.read(2)) == 0: break i += 2 #extract executable name as UTF-16 from offset 16 to NULL pf.seek(16) exename_hex = binascii.hexlify(pf.read(i)) exename_uni_str = codecs.decode(exename_hex, 'hex_codec').decode('utf-16') exename_utf_str = exename_uni_str.encode('utf-8') #list for comparing to exe path exename_list = [] #parse filename list pf.seek(100) filename_list_offset = utility().hextoint(pf.read(4)) filename_list_size = utility().hextoint(pf.read(4)) pf.seek(filename_list_offset) #loop from "filename_list_offset" to "filename_list_end_offset" current_offset = filename_list_offset filename_list_end_offset = filename_list_offset + filename_list_size filename_length = 0 while current_offset < filename_list_end_offset: #search NULL(boundary) in filename list if utility().hextoint(pf.read(2)) == 0: pf.seek(current_offset) filename_hex = binascii.hexlify(pf.read(filename_length)) filename_uni_str = codecs.decode(filename_hex, 'hex_codec').decode('utf-16') filename_utf_str = filename_uni_str.encode('utf-8') if exename_utf_str in filename_utf_str: exename_list.append(filename_utf_str) output_list.append(filename_utf_str+"\n") current_offset = current_offset + filename_length + 2 pf.seek(current_offset) filename_length = 0 else: filename_length += 2 #record_field create record_field = [""] * column_num #time zone record_field[column_order["time_zone"][1]] = \ utility().get_timezone_str() #computer name record_field[column_order["computer_name"][1]] = \ utility().get_computer_name(root) #prefetch filename record_field[column_order["prefetch_file"][1]] = filename #prefetch hash pf.seek(76) hash_bin = pf.read(4) hash_hex = re.split('(..)', binascii.hexlify(hash_bin))[1::2] list.reverse(hash_hex) hash_value = "".join(hash_hex) record_field[column_order["hash"][1]] = hash_value #exe path if not len(exename_list) == 0: record_field[column_order["exe_file_path"][1]] = exename_list[0] else: #fill by file name if file path is nothing record_field[column_order["exe_file_path"][1]] = get_exename(filename) #run count(Win7) if header_version == "17": pf.seek(152) record_field[column_order["run_count"][1]] = utility().hextoint(pf.read(4)) #run count(Win8 or Win10) if header_version == "1a" or header_version == "1e": pf.seek(208) record_field[column_order["run_count"][1]] = utility().hextoint(pf.read(4)) #volume information/number pf.seek(108) vl_info_offset = utility().hextoint(pf.read(4)) vl_num = utility().hextoint(pf.read(4)) #seek volume infomation pf.seek(vl_info_offset) try: vl_devicepath_offset = utility().hextoint(pf.read(4)) except ValueError: pass #volume length pf.seek(vl_info_offset + 4) vl_len = utility().hextoint(pf.read(4)) pf.seek(vl_info_offset + vl_devicepath_offset) vol1 = [] vol2 = [] for a in re.split('(..)', binascii.hexlify(pf.read(vl_len*2)))[1::2]: if a != "00": vol1.append(a) vol2 = binascii.a2b_hex("".join(vol1)) if vl_num >= 2: record_field[column_order["volume_path"][1]] = "Multiple" #remove volume path from exe file path with match record_field[column_order["exe_file_path"][1]] = \ re.sub(r"\\VOLUME{[0-9a-z_./?-]+\}|\\DEVICE\\HARDDISKVOLUME\d+", \ "", record_field[column_order["exe_file_path"][1]]) else: record_field[column_order["volume_path"][1]] = vol2 #remove volume path from exe file path with replace record_field[column_order["exe_file_path"][1]] = \ record_field[column_order["exe_file_path"][1]].replace(vol2, "") #last run information(Win7) if header_version == "17": pf.seek(128) time = utility().get_timestamp_str(utility().hextoint(pf.read(8))) record_field[column_order["date_time_1"][1]] = time #run time information(Win8 or Win10) if header_version == "1a" or header_version == "1e": lastrun_location = 128 while lastrun_location < 193: pf.seek(lastrun_location) try: time = utility().get_timestamp_str( utility().hextoint(pf.read(8))) record_field[column_order["date_time_" + str((lastrun_location-128)/8+1)][1]] = time except ValueError: pass lastrun_location = lastrun_location + 8 output_csv.append(record_field) row = [""] * column_num for column in column_order.values(): row[column[1]] = column[0] #open/close output csv file with open(out_dir+"\\prefetch_output.csv", "a") as output_file: output_line = csv.writer((output_file), delimiter="\t", lineterminator="\n") if fileindex == 0 and not args.noheader: output_line.writerow(row) output_line.writerows(output_csv) #open/close output list file with open(out_dir+"\\prefetch_output_list.csv", "a") as output_list_file: output_list.append("\n") output_list_file.writelines(output_list)
def parsepf(root, pf, filename, header_version, fileindex): output_prefetch = [] output_list = [] # prefetch_record_field create prefetch_record_field = [""] * column_num # convert sjis -> utf-8 for output filename_cp932 = filename.decode('cp932') filename = filename_cp932.encode('utf-8') # search NULL in executable name pf.seek(16) i = 0 while i < 60: if utility().hextoint(pf.read(2)) == 0: break i += 2 # extract executable name as UTF-16 from offset 16 to NULL pf.seek(16) exename_hex = binascii.hexlify(pf.read(i)) exename_uni_str = codecs.decode(exename_hex, 'hex_codec').decode('utf-16') exename_utf_str = exename_uni_str.encode('utf-8') # list for comparing to exe path exename_list = [] # parse filename list pf.seek(100) filename_list_offset = utility().hextoint(pf.read(4)) filename_list_size = utility().hextoint(pf.read(4)) pf.seek(filename_list_offset) # loop from "filename_list_offset" to "filename_list_end_offset" current_offset = filename_list_offset filename_list_end_offset = filename_list_offset + filename_list_size filename_length = 0 while current_offset < filename_list_end_offset: # search NULL(boundary) in filename list read = pf.read(2) if len(read) == 0: # Unexpected end of prefetch file. This file is damaged in the middle. prefetch_file_list = refine_prefetch_list( utility().get_computer_name(root), filename, output_list) write_output_file("prefetch_list_output.csv", prefetch_file_list, fileindex) return else: if utility().hextoint(read) == 0: pf.seek(current_offset) filename_hex = binascii.hexlify(pf.read(filename_length)) filename_uni_str = codecs.decode(filename_hex, 'hex_codec').decode('utf-16') filename_utf_str = filename_uni_str.encode('utf-8') if exename_utf_str in filename_utf_str: exename_list.append(filename_utf_str) output_list.append(filename_utf_str) current_offset = current_offset + filename_length + 2 pf.seek(current_offset) filename_length = 0 else: filename_length += 2 # time zone prefetch_record_field[prefetch_column_order["time_zone"][1]] = \ utility().get_timezone_str() # computer name prefetch_record_field[prefetch_column_order["computer_name"][1]] = \ utility().get_computer_name(root) # prefetch filename prefetch_record_field[prefetch_column_order["prefetch_file"][1]] = filename # prefetch hash pf.seek(76) hash_bin = pf.read(4) hash_hex = re.split('(..)', binascii.hexlify(hash_bin))[1::2] list.reverse(hash_hex) hash_value = "".join(hash_hex) prefetch_record_field[prefetch_column_order["hash"][1]] = hash_value # exe path if not len(exename_list) == 0: prefetch_record_field[prefetch_column_order["exe_file_path"] [1]] = exename_list[0] else: # fill by file name if file path is nothing prefetch_record_field[prefetch_column_order["exe_file_path"] [1]] = get_exename(filename) # run count(Win7) if header_version == "17": pf.seek(152) prefetch_record_field[prefetch_column_order["run_count"] [1]] = utility().hextoint(pf.read(4)) # run count(Win8 or Win10) if header_version == "1a" or header_version == "1e": # format 224 or 216 pf.seek(208) prefetch_record_field[prefetch_column_order["run_count"] [1]] = utility().hextoint(pf.read(4)) if prefetch_record_field[prefetch_column_order["run_count"][1]] == 0: pf.seek(200) prefetch_record_field[prefetch_column_order["run_count"] [1]] = utility().hextoint(pf.read(4)) # volume information/number pf.seek(108) vl_info_offset = utility().hextoint(pf.read(4)) vl_num = utility().hextoint(pf.read(4)) # seek volume infomation pf.seek(vl_info_offset) try: vl_devicepath_offset = utility().hextoint(pf.read(4)) except ValueError: pass # volume length pf.seek(vl_info_offset + 4) vl_len = utility().hextoint(pf.read(4)) pf.seek(vl_info_offset + vl_devicepath_offset) vol1 = [] vol2 = [] for a in re.split('(..)', binascii.hexlify(pf.read(vl_len * 2)))[1::2]: if a != "00": vol1.append(a) vol2 = binascii.a2b_hex("".join(vol1)) if vl_num >= 2: prefetch_record_field[prefetch_column_order["volume_path"] [1]] = "Multiple" # remove volume path from exe file path with match prefetch_record_field[prefetch_column_order["exe_file_path"][1]] = \ re.sub(r"\\VOLUME{[0-9a-z_./?-]+\}|\\DEVICE\\HARDDISKVOLUME\d+", \ "", prefetch_record_field[prefetch_column_order["exe_file_path"][1]]) else: prefetch_record_field[prefetch_column_order["volume_path"][1]] = vol2 # remove volume path from exe file path with replace prefetch_record_field[prefetch_column_order["exe_file_path"][1]] = \ prefetch_record_field[prefetch_column_order["exe_file_path"][1]].replace(vol2, "") # last run information(Win7) if header_version == "17": pf.seek(128) time = utility().get_timestamp_str(utility().hextoint(pf.read(8))) prefetch_record_field[prefetch_column_order["date_time_1"][1]] = time # run time information(Win8 or Win10) if header_version == "1a" or header_version == "1e": lastrun_location = 128 while lastrun_location < 193: pf.seek(lastrun_location) try: time = utility().get_timestamp_str(utility().hextoint( pf.read(8))) prefetch_record_field[prefetch_column_order["date_time_" + str( (lastrun_location - 128) / 8 + 1)][1]] = time except ValueError: pass lastrun_location = lastrun_location + 8 output_prefetch.append(prefetch_record_field) prefetch_file_list = refine_prefetch_list( utility().get_computer_name(root), filename, output_list) # open/close output csv file write_output_file("prefetch_output.csv", output_prefetch, fileindex) write_output_file("prefetch_list_output.csv", prefetch_file_list, fileindex)
def parseusnjrnl(journal_filesize, start_point, pathname, journal_file, output_file): computer_name = utility().get_computer_name(pathname) while True: record_field = [] #entry size journal_file.seek(int(start_point)) size_hex = re.split('(..)', binascii.hexlify(journal_file.read(4)))[1::2] list.reverse(size_hex) entry_size = int(("".join(size_hex)), 16) #file ID journal_file.seek(int(start_point + 8)) file_id = struct.unpack("<L", journal_file.read(4))[0] #parent folder ID journal_file.seek(int(start_point + 16)) parent_file_id = struct.unpack("<L", journal_file.read(4))[0] # TODO handle 8byte fileID/parentID extactly # file_id = struct.unpack("<Q", journal_file.read(8))[0] #timestamp journal_file.seek(int(start_point + 32)) ts = struct.unpack("<Q", journal_file.read(8))[0] ts_s = utility().get_timestamp_str(ts) #reasonflag #journal_file.seek(int(start_point + 40)) rhex, rflag = reasonflag(journal_file) #file name journal_file.seek(int(start_point + 56)) filename_size = struct.unpack("<H", journal_file.read(2))[0] journal_file.seek(int(start_point + 60)) filename = journal_file.read(filename_size) try: filename = filename.decode('UTF-16LE').encode('UTF-8') except UnicodeDecodeError: print repr(filename) #write record field with appropriate order record_field.append(computer_name) record_field.append(ts_s) record_field.append(time_delta) record_field.append(filename) record_field.append(rhex) record_field.append(rflag) record_field.append(file_id) record_field.append(parent_file_id) csv.writer((output_file), delimiter="\t", lineterminator="\n", quoting=csv.QUOTE_ALL).writerow(record_field) #search next startpoint or exit at EOF next_start = start_point + entry_size if next_start >= journal_filesize: output_file.close() return journal_file.seek(next_start) while True: if binascii.hexlify(journal_file.read(4)) != "00000000": start_point = journal_file.tell() - 4 break
lineterminator="\n", quoting=csv.QUOTE_ALL).writerow(record_field) #search next startpoint or exit at EOF next_start = start_point + entry_size if next_start >= journal_filesize: output_file.close() return journal_file.seek(next_start) while True: if binascii.hexlify(journal_file.read(4)) != "00000000": start_point = journal_file.tell() - 4 break if __name__ == '__main__': exists_flag = False for root, dirs, files in os.walk(in_dir): for filename in files: if not re.search(r'\$UsnJrnl-\$J', filename): continue exists_flag = True journal_pathname = os.path.join(root, filename) journal_filesize = os.path.getsize(journal_pathname) time_delta = utility().get_timezone_str() check_start_point(journal_pathname) print "Saved: %s\\%s_output.csv" % (out_dir, filename) if not exists_flag: print "$UsnJrnl-$J not found" sys.exit(1)