def test_open(self): """Tests the open function.""" if not unittest.source: raise unittest.SkipTest("missing source") pff_file = pypff.open(unittest.source) self.assertIsNotNone(pff_file) pff_file.close() with self.assertRaises(TypeError): pypff.open(None) with self.assertRaises(ValueError): pypff.open(unittest.source, mode="w")
def main(pst_file, report_name): """ The main function opens a PST and calls functions to parse and report data from the PST :param pst_file: A string representing the path to the PST file to analyze :param report_name: Name of the report title (if supplied by the user) :return: None """ logger.debug("Opening PST for processing...") pst_name = os.path.split(pst_file)[1] opst = pypff.open(pst_file) root = opst.get_root_folder() logger.debug("Starting traverse of PST structure...") folder_traverse(root) logger.debug("Generating Reports...") top_word_list = word_stats() top_sender_list = sender_report() date_report() html_report(report_name, pst_name, top_word_list, top_sender_list)
def main(pst_file, output_dir): print("[+] Accessing {} PST file".format(pst_file)) pst = pypff.open(pst_file) root = pst.get_root_folder() print("[+] Traversing PST folder structure") recursePST(root) print("[+] Identified {} messages..".format(messages))
def parse_pst(file_path: str) -> List[Dict]: try: pst = pypff.open(file_path) root = pst.get_root_folder() return recurse_pst(root) except Exception as e: logger.error(f"Failed to parse .pst file: {file_path}. Exception: {e}") return []
def main(pst_file): """ The main function opens a PST and calls functions to parse and report data from the PST :param pst_file: A string representing the path to the PST file to analyze :param report_name: Name of the report title (if supplied by the user) :return: None """ opst = pypff.open(pst_file) root = opst.get_root_folder() message_data = folder_traverse(root, [], **{'pst_name': pst_file, 'folder_name': 'root'}) header = ['pst_name', 'folder_name', 'creation_time', 'submit_time', 'delivery_time', 'sender', 'subject', 'attachment_count'] return message_data, header
def main(pst_file, output_dir, ig, threshold, links): print("[+] Accessing {} PST file..".format(pst_file)) pst = pypff.open(pst_file) root = pst.get_root_folder() print("[+] Traversing PST folder structure..") if ig is not None: ignore = [x.strip().lower() for x in ig.split(',')] else: ignore = [] recursePST(root, ignore) print("[+] Identified {} messages..".format(messages)) print( "[+] Compared {} messages. Messages not compared were missing a FROM header or both Reply-To and Return-Path" .format(compared_messages)) print("[+] Ignored {} comparable messages".format(ignored_messages)) print("[+] {} Messages without bodies to check for links".format( no_body_messages)) print( "[+] Identified {} suspicious messages..".format(suspicious_messages)) print("[+] Identifying emails complying with sender threshold limit of {}". format(threshold)) senderThreshold(threshold) print("[+] Identifying emails complying with link threshold limit of {}". format(links)) linkThreshold(links) global message_list headers = [ "Folder", "Subject", "Sender", "Attachments", "From Email", "Return-Path", "Reply-To", "Flag" ] print("[+] Writing {} results to CSV in {}".format(len(message_list), output_dir)) csv_writer(message_list, headers, output_dir) if lookupURLS: print("[+] Writing {} results to TXT in {}".format( len(urls_dict), output_dir)) with open(os.path.join(output_dir, 'Go_Phish_Unique_URLS_Found.txt'), 'w') as f: for url in urls_dict.keys(): if "http" in url: f.write("{}\n".format(url)) f.close() lookupURLs(urls_dict.keys()) writeThreatURLsToCSV(threaturls, output_dir, "Go-Phish-Threat-URLS")
def main(pst_file, report_name): """ The main function opens a PST and calls functions to parse and report data from the PST :param pst_file: A string representing the path to the PST file to analyze :param report_name: Name of the report title (if supplied by the user) :return: None """ logging.debug("Opening PST for processing...") pst_name = os.path.split(pst_file)[1] opst = pypff.open(pst_file) root = opst.get_root_folder() logging.debug("Starting traverse of PST structure...") folderTraverse(root) logging.debug("Generating Reports...") top_word_list = wordStats() top_sender_list = senderReport() dateReport() HTMLReport(report_name, pst_name, top_word_list, top_sender_list)
def main(pst_file): """ The main function opens a PST and calls functions to parse and report data from the PST :param pst_file: A string representing the path to the PST file to analyze :param report_name: Name of the report title (if supplied by the user) :return: None """ opst = pypff.open(pst_file) root = opst.get_root_folder() message_data = folder_traverse( root, [], **{ 'pst_name': pst_file, 'folder_name': 'root' }) header = [ 'pst_name', 'folder_name', 'creation_time', 'submit_time', 'delivery_time', 'sender', 'subject', 'attachment_count' ] return message_data, header
def main(pst_file, output_dir, ig, threshold, links): print("[+] Accessing {} PST file..".format(pst_file)) pst = pypff.open(pst_file) root = pst.get_root_folder() print("[+] Traversing PST folder structure..") if ig is not None: ignore = [x.strip().lower() for x in ig.split(',')] else: ignore = [] recursePST(root, ignore) print("[+] Identified {} messages..".format(messages)) print( "[+] Compared {} messages. Messages not compared were missing a FROM header or both Reply-To and Return-Path" .format(compared_messages)) print("[+] Ignored {} comparable messages".format(ignored_messages)) print("[+] {} Messages without bodies to check for links".format( no_body_messages)) print( "[+] Identified {} suspicious messages..".format(suspicious_messages)) print("[+] Identifying emails complying with sender threshold limit of {}". format(threshold)) senderThreshold(threshold) print("[+] Identifying emails complying with link threshold limit of {}". format(links)) linkThreshold(links) global message_list headers = [ "Folder", "Subject", "Sender", "Attachments", "From Email", "Return-Path", "Reply-To", "Flag" ] print("[+] Writing {} results to CSV in {}".format(len(message_list), output_dir)) csv_writer(message_list, headers, output_dir)
def show_folders(folder, depth=0): """ Shows or prints all folder names recursively. Args: folder: The top level folder. depth: The recursion depth. """ if folder.name != None: print("%s%s" % ((" " * depth), folder.name.encode("utf-8"))) for item in folder.sub_folders: show_folders(item, depth + 1) if __name__ == "__main__": """ magic main. """ pff_file = pypff.open("backup.pst") show_folders(pff_file.root_folder) try: root = pff_file.root_folder rows = { "mails.inbox.csv": [], "mails.sent.csv": [], "mails.what.csv": [] } traverse_folder(rows, root, "") for file, entries in rows.items(): with open(os.path.join("", file), 'wb') as wp:
def folderReport(message_list, folder_name, filename): #if not len(message_list): # logging.warning("Empty message not processed") # return fout_path = makePath("folder_report_" + filename + "_" + folder_name + ".txt") fout = open(fout_path, 'wb') header = ['subject', 'sender', 'header', 'body'] csv_fout = csv.DictWriter(fout, fieldnames=header, extrasaction='ignore') csv_fout.writeheader() csv_fout.writerows(message_list) fout.close() #fn = os.path.join(os.path.dirname(__file__), '/Data/EnronDataOrg_AED_Mailbox-PSTs_20090122/*.pst') directory = 'Data/Enron_txt' if not os.path.exists(directory): os.makedirs(directory) pst_list = glob.glob("Data/Enron/*.pst") for pst_file in pst_list: pst_name = os.path.split(pst_file)[1].split('.')[0] print pst_name opst = pypff.open(pst_file) root = opst.get_root_folder() folderTraverse(root, pst_name)