except Exception as e: print "Failed to process message: {} Exception:".format(email_id, e) print traceback.format_exc() raise e return row if __name__ == "__main__": desc = ''' examples: ./this.py email.eml ''' parser = argparse.ArgumentParser( description=" ... ", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=desc) #parser.add_argument("target_email", help="target email") #parser.add_argument("outdir", help="Out Dir") parser.add_argument("file_path", help="File Path") #parser.add_argument("infile", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Input File") args = parser.parse_args() guid = md5(args.file_path) category = categoryList(args.file_path) buff_msg = slurp(args.file_path) message = email.message_from_string(buff_msg) row = extract(guid, message, category) print row
parser.add_argument("--suffix", default="_mime.txt", help="file suffix mask which will be used to designate eml files, default=_mime.txt") #parser.add_argument("infile", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Input File") args = parser.parse_args() print "ARGS: %s"%str(args) emls_path = os.path.abspath(args.eml_root_path) count_failures = 0 with RollingFile(args.out_dir, "part", args.limit) as outfile: for i, eml_file in enumerate(eml_files(emls_path, args.suffix)): guid = str(uuid.uuid1()) try: categories = email_extract_json_unicode.categoryList(os.path.split(eml_file)[0].replace(emls_path, "", 1)) message = email.message_from_string(slurp(eml_file)) row = email_extract_json_unicode.extract(guid, message, categories, preserve_attachments=args.preserve_attachments) row["ingest_id"] = args.ingest_id row["case_id"] = args.case_id row["alt_ref_id"] = args.alt_ref_id row["label"] = args.label row["original_artifact"] = {"filename" : eml_file, "type" : "eml"} outfile.write(json.dumps(row) + "\n") except Exception as e: count_failures += 1 traceback.print_exc() print "FAILED to process eml_file {}. Exception line: {} | {} ".format(eml_file, i, e.message) if i % 1000 == 0: prn("completed line: {}".format(i))
# writes raw message to txt file #spit("{}/{}.txt".format(_dir, email_id), msg) row= createRow(email_id, message, attach, msg, categories) return row if __name__ == "__main__": desc = ''' examples: ./this.py email.eml ''' parser = argparse.ArgumentParser( description=" ... ", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=desc) #parser.add_argument("target_email", help="target email") #parser.add_argument("outdir", help="Out Dir") parser.add_argument("file_path", help="File Path") #parser.add_argument("infile", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Input File") args = parser.parse_args() guid = md5(args.file_path) category = categoryList(args.file_path) buff_msg = slurp(args.file_path) message = email.message_from_string(buff_msg) row = extract(guid, message, category) print row
) #parser.add_argument("infile", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Input File") args = parser.parse_args() print "ARGS: %s" % str(args) emls_path = os.path.abspath(args.eml_root_path) count_failures = 0 with RollingFile(args.out_dir, "part", args.limit) as outfile: for i, eml_file in enumerate(eml_files(emls_path, args.suffix)): guid = str(uuid.uuid1()) try: categories = email_extract_json_unicode.categoryList( os.path.split(eml_file)[0].replace(emls_path, "", 1)) message = email.message_from_string(slurp(eml_file)) row = email_extract_json_unicode.extract( guid, message, categories, preserve_attachments=args.preserve_attachments) row["ingest_id"] = args.ingest_id row["case_id"] = args.case_id row["alt_ref_id"] = args.alt_ref_id row["label"] = args.label row["original_artifact"] = { "filename": eml_file, "type": "eml" } outfile.write(json.dumps(row) + "\n") except Exception as e: