def get_cookie_votes(outconn, lookupcur, annocur, cookie_fiwalk_id, cookie_contents): """ Input: Database connection (if not live, this is nearly a nop), entire contents of cookie (this can come from HTTP header or file) Output: All geographic votes from cookie contents """ cookie_contents_lower = cookie_contents.lower() retlist = [] basic_vote = dict() basic_vote["fiwalk_id"] = cookie_fiwalk_id #Fill in file system info from annodb if annocur: annocur.execute(""" SELECT tsk_obj_id, tf.fs_obj_id, tf.mtime, tf.atime, tf.ctime, tf.crtime FROM fiwalk_id_to_tsk_obj_id, tskout.tsk_files AS tf WHERE fiwalk_id_to_tsk_obj_id.tsk_obj_id = tf.obj_id AND fiwalk_id = ? ; """, (cookie_fiwalk_id,)) annorows = [row for row in annocur] if len(annorows) == 1: basic_vote["fs_obj_id"] = annorows[0]["fs_obj_id"] basic_vote["obj_id"] = annorows[0]["tsk_obj_id"] for timefield in ["mtime", "atime", "ctime", "crtime"]: annorow = {key:annorows[0][key] for key in annorows[0].keys()} if annorow.get(timefield): basic_vote["selected_time_type"] = timefield basic_vote["believed_timestamp"] = dfxml.dftime(annorow[timefield]).iso8601() break #TODO use city_matches = match_cities(lookupcur, cookie_contents) #Perform MSN matches msn_locations = all_msn_matches(cookie_contents) for m in msn_locations: if not (m.get("longitude") and m.get("latitude")): continue retdict = copy.deepcopy(basic_vote) retdict["record_type"] = "msn" retdict["latitude"] = float(m["latitude"]) retdict["longitude"] = float(m["longitude"]) if m.get("countrycode"): retdict["country"] = m["countrycode"] retdict["postalCode"] = m["zipcode"] locations_from_latlongs = geoproc_library.latlongs_to_networked_locations(lookupcur, retdict["latitude"], retdict["longitude"], 30) if locations_from_latlongs is None: sys.stderr.write("Warning: Couldn't look up latitude/longitude.\n") retdict["database_queried"] = False if locations_from_latlongs is not None and len(locations_from_latlongs) > 0: #Use closest location locrec = locations_from_latlongs[0] for locfield in ["country", "postalCode"]: if locrec.get(locfield): if retdict.get(locfield): retdict["cookie_latlong_and_maxmind_agree_on_" + locfield] = locrec[locfield] == retdict[locfield] if not retdict["cookie_latlong_and_maxmind_agree_on_" + locfield]: sys.stderr.write("Warning: Data anomaly: MSN cookie reports %s %r, lat/long seem to be in %r by MaxMind." % (locfield, retdict["country"], locrec["country"])) else: retdict[locfield] = locrec[locfield] retdict["region"] = locrec["region"] retdict["city"] = locrec["city"] retdict["database_queried"] = True retlist.append(retdict) #Perform IPv4 text matches all_ips = geoproc_library.all_ipv4s(cookie_contents) for ipv4 in all_ips: believed_cookie_time = None #TODO Get actual times from database, loop through them all_ip_locations = geoproc_library.ips_to_locations(lookupcur, believed_cookie_time, all_ips) retdict = copy.deepcopy(basic_vote) retdict["record_type"] = "ipv4" retdict["ipv4"] = ipv4 retdict["database_queried"] = all_ip_locations is not None if all_ip_locations is not None and ipv4 in all_ip_locations: rec = all_ip_locations[ipv4] retdict["latitude"] = rec.get("latitude") retdict["longitude"] = rec.get("longitude") retdict["postalCode"] = rec.get("postalCode") retdict["maxmind_ipv4_time"] = dfxml.dftime(rec.get("maxmind_ipv4_time")).iso8601() if rec.get("country"): retdict["country"] = rec["country"] retdict["country_found_in_text"] = rec["country"].lower() in cookie_contents_lower if rec.get("region"): retdict["region"] = rec["region"] retdict["region_found_in_text"] = rec["region"].lower() in cookie_contents_lower if rec.get("city"): retdict["city"] = rec["city"] retdict["city_found_in_text"] = rec["city"].lower() in cookie_contents_lower retlist.append(retdict) return retlist
def main(): global args #Set up lookup database connection cfg = geoproc_cfg.config lookupconn = None lookupcur = None try: import mysql.connector as mdb lookupconn = mdb.connect( host=cfg.get("mysql", "maxmind_server"), user=cfg.get("mysql", "maxmind_read_username"), password=geoproc_cfg.db_password("maxmind_read_password_file"), db=cfg.get("mysql", "maxmind_schema"), use_unicode=True ) lookupcur = lookupconn.cursor(cursor_class=geoproc_cfg.MySQLCursorDict) except: sys.stderr.write("Warning: Could not connect to database. Proceeding without database support.\n") pass #Connect to annodb annoconn, annocur = geoproc_library.connect_to_fs_anno_db(args.annodb) #Verify input manifest_path = os.path.join(args.emaildir, "manifest.txt") if not os.path.isfile(manifest_path): raise Exception("Error: manifest.txt not found in input directory.") #Ingest BE ips, if available #Stash in (once-tested) histogram. #Dictionary key: ipv4 address #Dictionary value: (notes, tally) default dictionary. ip_notes_histogram = collections.defaultdict(lambda: collections.defaultdict(lambda: 0)) if args.bulk_extractor_output: for (forensic_path, ipv4, ipv4_notes) in geoproc_library.bulk_extractor_ips(args.bulk_extractor_output): ip_notes_histogram[ipv4][ipv4_notes] += 1 dprint("Debug: Number of IPv4s with notes: %d." % len(ip_notes_histogram.keys())) #Set up output database outdbpath = os.path.join(args.outdir, "email_files_votes.db") if os.path.isfile(outdbpath): raise Exception("Error: Output database already exists. This script won't overwrite. Aborting.") outconn = sqlite3.connect(outdbpath) outconn.isolation_level = "EXCLUSIVE" outconn.row_factory = sqlite3.Row outcur = outconn.cursor() outcur.execute(SQL_CREATE_EMAIL_FILES_VOTES) for (fiwalk_id, messageno, message) in emails_in_dir_manifest(manifest_path): dprint("Debug: Analyzing a record from fiwalk_id %r." % fiwalk_id) #print(repr(type(message))) #for i in message.keys(): # print('%r: %r' % (i, message.get_all(i))) received_recs = message.get_all("Received") if not received_recs: continue pathlength = len(received_recs) for (pathindex, pathline) in enumerate(received_recs): #TODO Just getting all the IPs for now; filter later ips = geoproc_library.all_ipv4s(pathline) dprint("Debug: Found this many IP's: %d.\n\t%r" % (len(ips), ips)) #Can we get a date? maybe_timestamp = None maybe_timestamp_match = dfxml.rx_rfc822datetime.search(pathline) if maybe_timestamp_match: thestring = maybe_timestamp_match.string thespan = maybe_timestamp_match.span() thedatestring = thestring[thespan[0]:thespan[1]] try: maybe_timestamp = dfxml.dftime(thedatestring) except: sys.stderr.write("Warning: An error occured trying to parse time input.\nInput:%r\nStack trace:\n" % thedatestring) sys.stderr.write(traceback.format_exc()) sys.stderr.write("\n") #Don't stop here. dprint("Debug: Believed timestamp: %r." % maybe_timestamp) #Now that we have a date, can we get locations? if maybe_timestamp: #Can we get a single recipient? (This is, of course, not guaranteed to be the owner.) sole_recipient = None delivered_to_headers = message.get_all("Delivered-To") to_headers = message.get_all("To") if delivered_to_headers and len(delivered_to_headers) == 1: sole_recipient = delivered_to_headers[0] elif to_headers and len(to_headers) == 1 and len(to_headers[0].split("\n")) == 1: sole_recipient = to_headers[0] all_ip_locations = geoproc_library.ips_to_locations(lookupcur, maybe_timestamp.datetime(), ips) dprint("Debug: Fetched these IP location records:\n\t%r" % all_ip_locations) for ip in ips: outdict = {"fiwalk_id":fiwalk_id} #TODO Use annodb to get TSK identifiers outdict["message_index"] = messageno outdict["ipv4"] = ip outdict["received_path_index"] = pathindex outdict["received_path_length"] = pathlength outdict["received_header_text"] = pathline outdict["database_queried"] = all_ip_locations is not None outdict["believed_timestamp"] = str(maybe_timestamp) outdict["sole_recipient_domain_is_webmail"] = geoproc_library.in_webmail_domain(sole_recipient) if all_ip_locations is not None and ip in all_ip_locations: rec = all_ip_locations[ip] outdict["latitude"] = rec.get("latitude") outdict["longitude"] = rec.get("longitude") outdict["postalCode"] = rec.get("postalCode") outdict["maxmind_ipv4_time"] = dfxml.dftime(rec.get("maxmind_ipv4_time")).iso8601() if rec.get("country"): outdict["country"] = rec["country"] if rec.get("region"): outdict["region"] = rec["region"] if rec.get("city"): outdict["city"] = rec["city"] dprint("Debug: Checking for IP notes for %r." % ip) if ip in ip_notes_histogram: dprint("Debug: Formatting notes for %r." % ip) notedict = ip_notes_histogram[ip] notelist = sorted(notedict.keys()) notes_to_format = [] for note in notelist: notes_to_format.append("%d %r" % (notedict[note], note)) outdict["ipv4_be_notes"] = "; ".join(notes_to_format) outdict["ipv4_be_has_cksum_or_socket"] = "sockaddr" in outdict["ipv4_be_notes"] or "cksum-ok" in outdict["ipv4_be_notes"] dprint("Debug: Outdict just before inserting:\n\t%r" % outdict) geoproc_library.insert_db(outcur, "email_files_votes", outdict) outconn.commit() dprint("Debug: Done.")