def main(): global args #Connect to anno db if available annoconn, annocur = geoproc_library.connect_to_fs_anno_db(args.anno) #Connect to db cfg = geoproc_cfg.config refconn = mysql.connector.Connect( host=cfg.get("mysql", "maxmind_server"), user=cfg.get("mysql", "maxmind_read_username"), password=geoproc_cfg.db_password("maxmind_read_password_file"), db=cfg.get("mysql", "maxmind_schema"), use_unicode=True ) if refconn is None: raise Exception("Error: Could not define lookup cursor.") refcur = refconn.cursor(cursor_class=geoproc_cfg.MySQLCursorDict) outconn = sqlite3.connect("ipv4s_votes.db") outconn.isolation_level = "EXCLUSIVE" outconn.row_factory = sqlite3.Row outcur = outconn.cursor() outcur.execute(SQL_CREATE_IPV4S_VOTES) pairing_dict = collections.defaultdict(list) ip_set = set([]) for (ipno, (forensic_path, ipv4, ipv4_notes)) in enumerate(geoproc_library.bulk_extractor_ips(args.be_dir)): pairing_dict[forensic_path].append((ipv4, ipv4_notes)) ip_set.add(ipv4) #Unfortunately, there isn't much to do for timestamps without file system or network time information. #TODO Add time interface dummy_dftime = dfxml.dftime("2009-05-01T00:00:00Z") ips_to_locs = geoproc_library.ips_to_locations(refcur, None, ip_set) for forensic_path in pairing_dict: #Determine if we have a pair entries_at_path = pairing_dict[forensic_path] pair_found = len(entries_at_path) == 2 for (ipv4, ipv4_notes) in entries_at_path: outdict = dict() outdict["believed_timestamp"] = dummy_dftime.iso8601() outdict["forensic_path"] = forensic_path outdict["ipv4"] = ipv4 outdict["ipv4_notes"] = ipv4_notes if "cksum-bad" in ipv4_notes: outdict["cksum_ok"] = False elif "cksum-ok" in ipv4_notes: outdict["cksum_ok"] = True #None, otherwise outdict["is_socket_address"] = "sockaddr" in ipv4_notes outdict["pair_found"] = pair_found if "(src)" in ipv4_notes: outdict["src_or_dst"] = "src" elif "dst" in ipv4_notes: outdict["src_or_dst"] = "dst" #None, otherwise annorecs = geoproc_library.forensic_path_to_anno_recs(annocur, outdict["forensic_path"]) if annorecs and len(annorecs) > 1: sys.stderr.write("Warning: Multiple files found to own forensic path %r. Only using first. This may cause strange results.\n" % outdict["forensic_path"]) if annorecs and len(annorecs) > 0: annorec = annorecs[0] outdict["obj_id"] = annorec.get("obj_id") outdict["fs_obj_id"] = annorec.get("fs_obj_id") outdict["fiwalk_id"] = annorec.get("fiwalk_id") if ipv4 in ips_to_locs: for key in [ "maxmind_ipv4_time", "country", "region", "city", "postalCode", "latitude", "longitude" ]: outdict[key] = ips_to_locs[ipv4][key] geoproc_library.insert_db(outcur, "ipv4s_votes", outdict) outconn.commit()
def main(): global args #Set up lookup database connection cfg = geoproc_cfg.config lookupconn = None lookupcur = None try: import mysql.connector as mdb lookupconn = mdb.connect( host=cfg.get("mysql", "maxmind_server"), user=cfg.get("mysql", "maxmind_read_username"), password=geoproc_cfg.db_password("maxmind_read_password_file"), db=cfg.get("mysql", "maxmind_schema"), use_unicode=True ) lookupcur = lookupconn.cursor(cursor_class=geoproc_cfg.MySQLCursorDict) except: sys.stderr.write("Warning: Could not connect to database. Proceeding without database support.\n") pass #Connect to annodb annoconn, annocur = geoproc_library.connect_to_fs_anno_db(args.annodb) #Verify input manifest_path = os.path.join(args.emaildir, "manifest.txt") if not os.path.isfile(manifest_path): raise Exception("Error: manifest.txt not found in input directory.") #Ingest BE ips, if available #Stash in (once-tested) histogram. #Dictionary key: ipv4 address #Dictionary value: (notes, tally) default dictionary. ip_notes_histogram = collections.defaultdict(lambda: collections.defaultdict(lambda: 0)) if args.bulk_extractor_output: for (forensic_path, ipv4, ipv4_notes) in geoproc_library.bulk_extractor_ips(args.bulk_extractor_output): ip_notes_histogram[ipv4][ipv4_notes] += 1 dprint("Debug: Number of IPv4s with notes: %d." % len(ip_notes_histogram.keys())) #Set up output database outdbpath = os.path.join(args.outdir, "email_files_votes.db") if os.path.isfile(outdbpath): raise Exception("Error: Output database already exists. This script won't overwrite. Aborting.") outconn = sqlite3.connect(outdbpath) outconn.isolation_level = "EXCLUSIVE" outconn.row_factory = sqlite3.Row outcur = outconn.cursor() outcur.execute(SQL_CREATE_EMAIL_FILES_VOTES) for (fiwalk_id, messageno, message) in emails_in_dir_manifest(manifest_path): dprint("Debug: Analyzing a record from fiwalk_id %r." % fiwalk_id) #print(repr(type(message))) #for i in message.keys(): # print('%r: %r' % (i, message.get_all(i))) received_recs = message.get_all("Received") if not received_recs: continue pathlength = len(received_recs) for (pathindex, pathline) in enumerate(received_recs): #TODO Just getting all the IPs for now; filter later ips = geoproc_library.all_ipv4s(pathline) dprint("Debug: Found this many IP's: %d.\n\t%r" % (len(ips), ips)) #Can we get a date? maybe_timestamp = None maybe_timestamp_match = dfxml.rx_rfc822datetime.search(pathline) if maybe_timestamp_match: thestring = maybe_timestamp_match.string thespan = maybe_timestamp_match.span() thedatestring = thestring[thespan[0]:thespan[1]] try: maybe_timestamp = dfxml.dftime(thedatestring) except: sys.stderr.write("Warning: An error occured trying to parse time input.\nInput:%r\nStack trace:\n" % thedatestring) sys.stderr.write(traceback.format_exc()) sys.stderr.write("\n") #Don't stop here. dprint("Debug: Believed timestamp: %r." % maybe_timestamp) #Now that we have a date, can we get locations? if maybe_timestamp: #Can we get a single recipient? (This is, of course, not guaranteed to be the owner.) sole_recipient = None delivered_to_headers = message.get_all("Delivered-To") to_headers = message.get_all("To") if delivered_to_headers and len(delivered_to_headers) == 1: sole_recipient = delivered_to_headers[0] elif to_headers and len(to_headers) == 1 and len(to_headers[0].split("\n")) == 1: sole_recipient = to_headers[0] all_ip_locations = geoproc_library.ips_to_locations(lookupcur, maybe_timestamp.datetime(), ips) dprint("Debug: Fetched these IP location records:\n\t%r" % all_ip_locations) for ip in ips: outdict = {"fiwalk_id":fiwalk_id} #TODO Use annodb to get TSK identifiers outdict["message_index"] = messageno outdict["ipv4"] = ip outdict["received_path_index"] = pathindex outdict["received_path_length"] = pathlength outdict["received_header_text"] = pathline outdict["database_queried"] = all_ip_locations is not None outdict["believed_timestamp"] = str(maybe_timestamp) outdict["sole_recipient_domain_is_webmail"] = geoproc_library.in_webmail_domain(sole_recipient) if all_ip_locations is not None and ip in all_ip_locations: rec = all_ip_locations[ip] outdict["latitude"] = rec.get("latitude") outdict["longitude"] = rec.get("longitude") outdict["postalCode"] = rec.get("postalCode") outdict["maxmind_ipv4_time"] = dfxml.dftime(rec.get("maxmind_ipv4_time")).iso8601() if rec.get("country"): outdict["country"] = rec["country"] if rec.get("region"): outdict["region"] = rec["region"] if rec.get("city"): outdict["city"] = rec["city"] dprint("Debug: Checking for IP notes for %r." % ip) if ip in ip_notes_histogram: dprint("Debug: Formatting notes for %r." % ip) notedict = ip_notes_histogram[ip] notelist = sorted(notedict.keys()) notes_to_format = [] for note in notelist: notes_to_format.append("%d %r" % (notedict[note], note)) outdict["ipv4_be_notes"] = "; ".join(notes_to_format) outdict["ipv4_be_has_cksum_or_socket"] = "sockaddr" in outdict["ipv4_be_notes"] or "cksum-ok" in outdict["ipv4_be_notes"] dprint("Debug: Outdict just before inserting:\n\t%r" % outdict) geoproc_library.insert_db(outcur, "email_files_votes", outdict) outconn.commit() dprint("Debug: Done.")