def _end_element(self, name): if name in ["msregistry","hive"]: pass elif name in ["key","node"]: finished_object = self.objectstack.pop() #Add finished object to object index if finished_object.cellpath in self.registry_object.object_index: raise ValueError("regxml_reader_Objects._end_element: Same key path found more than once: " + finished_object.cellpath) self.registry_object.object_index[finished_object.cellpath] = finished_object self.callback(finished_object) elif name in ["mtime"]: self.objectstack[-1].mtime = dfxml.dftime(self.cdata) self.cdata = None elif name in ["value"]: finished_object = self.objectstack.pop() if finished_object.data == None: finished_object.data = self.cdata self.callback(finished_object) elif name in ["string"]: value_object = self.objectstack[-1] if value_object.strings == None: raise ValueError("regxml_reader_Objects._end_element: parsing error, string element found, but parent's type can't support a string list.") value_object.strings.append(self.cdata) self.cdata = None elif name in ["byte_runs","byte_run"]: pass else: raise ValueError("regxml_reader_Objects._end_element: Don't know how to end element %s.\n" % name)
def xmlout_times(fn, x, fistat): global args for (time_tag, time_field) in [("mtime", "st_mtime"), ("atime", "st_atime"), ("ctime", "st_ctime"), ("crtime", "st_birthtime")]: if time_field in dir(fistat): attrs_dict = dict() time_data = getattr(fistat, time_field) #Format timestamp data if args.iso_8601: import dfxml text_out = str(dfxml.dftime(time_data)) else: attrs_dict["format"] = "time_t" text_out = str(time_data) x.xmlout(time_tag, text_out, attrs_dict)
def xmlout_times(fn,x,fistat): global args for (time_tag, time_field) in [ ("mtime", "st_mtime"), ("atime", "st_atime"), ("ctime", "st_ctime"), ("crtime", "st_birthtime") ]: if time_field in dir(fistat): attrs_dict = dict() time_data = getattr(fistat,time_field) #Format timestamp data if args.iso_8601: import dfxml text_out = str(dfxml.dftime(time_data)) else: attrs_dict["format"] = "time_t" text_out = str(time_data) x.xmlout(time_tag, text_out, attrs_dict)
def machine_tag_timestamp(machine, tag): image_timestamp = dfxml.dftime(MACHINE_TIMES[machine][tag]) image_datetime = image_timestamp.datetime() return image_datetime
def main(): global args #Connect to anno db if available annoconn, annocur = geoproc_library.connect_to_fs_anno_db(args.anno) #Connect to db cfg = geoproc_cfg.config refconn = mysql.connector.Connect( host=cfg.get("mysql", "maxmind_server"), user=cfg.get("mysql", "maxmind_read_username"), password=geoproc_cfg.db_password("maxmind_read_password_file"), db=cfg.get("mysql", "maxmind_schema"), use_unicode=True ) if refconn is None: raise Exception("Error: Could not define lookup cursor.") refcur = refconn.cursor(cursor_class=geoproc_cfg.MySQLCursorDict) outconn = sqlite3.connect("ipv4s_votes.db") outconn.isolation_level = "EXCLUSIVE" outconn.row_factory = sqlite3.Row outcur = outconn.cursor() outcur.execute(SQL_CREATE_IPV4S_VOTES) pairing_dict = collections.defaultdict(list) ip_set = set([]) for (ipno, (forensic_path, ipv4, ipv4_notes)) in enumerate(geoproc_library.bulk_extractor_ips(args.be_dir)): pairing_dict[forensic_path].append((ipv4, ipv4_notes)) ip_set.add(ipv4) #Unfortunately, there isn't much to do for timestamps without file system or network time information. #TODO Add time interface dummy_dftime = dfxml.dftime("2009-05-01T00:00:00Z") ips_to_locs = geoproc_library.ips_to_locations(refcur, None, ip_set) for forensic_path in pairing_dict: #Determine if we have a pair entries_at_path = pairing_dict[forensic_path] pair_found = len(entries_at_path) == 2 for (ipv4, ipv4_notes) in entries_at_path: outdict = dict() outdict["believed_timestamp"] = dummy_dftime.iso8601() outdict["forensic_path"] = forensic_path outdict["ipv4"] = ipv4 outdict["ipv4_notes"] = ipv4_notes if "cksum-bad" in ipv4_notes: outdict["cksum_ok"] = False elif "cksum-ok" in ipv4_notes: outdict["cksum_ok"] = True #None, otherwise outdict["is_socket_address"] = "sockaddr" in ipv4_notes outdict["pair_found"] = pair_found if "(src)" in ipv4_notes: outdict["src_or_dst"] = "src" elif "dst" in ipv4_notes: outdict["src_or_dst"] = "dst" #None, otherwise annorecs = geoproc_library.forensic_path_to_anno_recs(annocur, outdict["forensic_path"]) if annorecs and len(annorecs) > 1: sys.stderr.write("Warning: Multiple files found to own forensic path %r. Only using first. This may cause strange results.\n" % outdict["forensic_path"]) if annorecs and len(annorecs) > 0: annorec = annorecs[0] outdict["obj_id"] = annorec.get("obj_id") outdict["fs_obj_id"] = annorec.get("fs_obj_id") outdict["fiwalk_id"] = annorec.get("fiwalk_id") if ipv4 in ips_to_locs: for key in [ "maxmind_ipv4_time", "country", "region", "city", "postalCode", "latitude", "longitude" ]: outdict[key] = ips_to_locs[ipv4][key] geoproc_library.insert_db(outcur, "ipv4s_votes", outdict) outconn.commit()
def dftime_from_filetime(ft): return dfxml.dftime(filetime_to_timestamp(ft))
def get_cookie_votes(outconn, lookupcur, annocur, cookie_fiwalk_id, cookie_contents): """ Input: Database connection (if not live, this is nearly a nop), entire contents of cookie (this can come from HTTP header or file) Output: All geographic votes from cookie contents """ cookie_contents_lower = cookie_contents.lower() retlist = [] basic_vote = dict() basic_vote["fiwalk_id"] = cookie_fiwalk_id #Fill in file system info from annodb if annocur: annocur.execute(""" SELECT tsk_obj_id, tf.fs_obj_id, tf.mtime, tf.atime, tf.ctime, tf.crtime FROM fiwalk_id_to_tsk_obj_id, tskout.tsk_files AS tf WHERE fiwalk_id_to_tsk_obj_id.tsk_obj_id = tf.obj_id AND fiwalk_id = ? ; """, (cookie_fiwalk_id,)) annorows = [row for row in annocur] if len(annorows) == 1: basic_vote["fs_obj_id"] = annorows[0]["fs_obj_id"] basic_vote["obj_id"] = annorows[0]["tsk_obj_id"] for timefield in ["mtime", "atime", "ctime", "crtime"]: annorow = {key:annorows[0][key] for key in annorows[0].keys()} if annorow.get(timefield): basic_vote["selected_time_type"] = timefield basic_vote["believed_timestamp"] = dfxml.dftime(annorow[timefield]).iso8601() break #TODO use city_matches = match_cities(lookupcur, cookie_contents) #Perform MSN matches msn_locations = all_msn_matches(cookie_contents) for m in msn_locations: if not (m.get("longitude") and m.get("latitude")): continue retdict = copy.deepcopy(basic_vote) retdict["record_type"] = "msn" retdict["latitude"] = float(m["latitude"]) retdict["longitude"] = float(m["longitude"]) if m.get("countrycode"): retdict["country"] = m["countrycode"] retdict["postalCode"] = m["zipcode"] locations_from_latlongs = geoproc_library.latlongs_to_networked_locations(lookupcur, retdict["latitude"], retdict["longitude"], 30) if locations_from_latlongs is None: sys.stderr.write("Warning: Couldn't look up latitude/longitude.\n") retdict["database_queried"] = False if locations_from_latlongs is not None and len(locations_from_latlongs) > 0: #Use closest location locrec = locations_from_latlongs[0] for locfield in ["country", "postalCode"]: if locrec.get(locfield): if retdict.get(locfield): retdict["cookie_latlong_and_maxmind_agree_on_" + locfield] = locrec[locfield] == retdict[locfield] if not retdict["cookie_latlong_and_maxmind_agree_on_" + locfield]: sys.stderr.write("Warning: Data anomaly: MSN cookie reports %s %r, lat/long seem to be in %r by MaxMind." % (locfield, retdict["country"], locrec["country"])) else: retdict[locfield] = locrec[locfield] retdict["region"] = locrec["region"] retdict["city"] = locrec["city"] retdict["database_queried"] = True retlist.append(retdict) #Perform IPv4 text matches all_ips = geoproc_library.all_ipv4s(cookie_contents) for ipv4 in all_ips: believed_cookie_time = None #TODO Get actual times from database, loop through them all_ip_locations = geoproc_library.ips_to_locations(lookupcur, believed_cookie_time, all_ips) retdict = copy.deepcopy(basic_vote) retdict["record_type"] = "ipv4" retdict["ipv4"] = ipv4 retdict["database_queried"] = all_ip_locations is not None if all_ip_locations is not None and ipv4 in all_ip_locations: rec = all_ip_locations[ipv4] retdict["latitude"] = rec.get("latitude") retdict["longitude"] = rec.get("longitude") retdict["postalCode"] = rec.get("postalCode") retdict["maxmind_ipv4_time"] = dfxml.dftime(rec.get("maxmind_ipv4_time")).iso8601() if rec.get("country"): retdict["country"] = rec["country"] retdict["country_found_in_text"] = rec["country"].lower() in cookie_contents_lower if rec.get("region"): retdict["region"] = rec["region"] retdict["region_found_in_text"] = rec["region"].lower() in cookie_contents_lower if rec.get("city"): retdict["city"] = rec["city"] retdict["city_found_in_text"] = rec["city"].lower() in cookie_contents_lower retlist.append(retdict) return retlist
def main(): global args #Set up lookup database connection cfg = geoproc_cfg.config lookupconn = None lookupcur = None try: import mysql.connector as mdb lookupconn = mdb.connect( host=cfg.get("mysql", "maxmind_server"), user=cfg.get("mysql", "maxmind_read_username"), password=geoproc_cfg.db_password("maxmind_read_password_file"), db=cfg.get("mysql", "maxmind_schema"), use_unicode=True ) lookupcur = lookupconn.cursor(cursor_class=geoproc_cfg.MySQLCursorDict) except: sys.stderr.write("Warning: Could not connect to database. Proceeding without database support.\n") pass #Connect to annodb annoconn, annocur = geoproc_library.connect_to_fs_anno_db(args.annodb) #Verify input manifest_path = os.path.join(args.emaildir, "manifest.txt") if not os.path.isfile(manifest_path): raise Exception("Error: manifest.txt not found in input directory.") #Ingest BE ips, if available #Stash in (once-tested) histogram. #Dictionary key: ipv4 address #Dictionary value: (notes, tally) default dictionary. ip_notes_histogram = collections.defaultdict(lambda: collections.defaultdict(lambda: 0)) if args.bulk_extractor_output: for (forensic_path, ipv4, ipv4_notes) in geoproc_library.bulk_extractor_ips(args.bulk_extractor_output): ip_notes_histogram[ipv4][ipv4_notes] += 1 dprint("Debug: Number of IPv4s with notes: %d." % len(ip_notes_histogram.keys())) #Set up output database outdbpath = os.path.join(args.outdir, "email_files_votes.db") if os.path.isfile(outdbpath): raise Exception("Error: Output database already exists. This script won't overwrite. Aborting.") outconn = sqlite3.connect(outdbpath) outconn.isolation_level = "EXCLUSIVE" outconn.row_factory = sqlite3.Row outcur = outconn.cursor() outcur.execute(SQL_CREATE_EMAIL_FILES_VOTES) for (fiwalk_id, messageno, message) in emails_in_dir_manifest(manifest_path): dprint("Debug: Analyzing a record from fiwalk_id %r." % fiwalk_id) #print(repr(type(message))) #for i in message.keys(): # print('%r: %r' % (i, message.get_all(i))) received_recs = message.get_all("Received") if not received_recs: continue pathlength = len(received_recs) for (pathindex, pathline) in enumerate(received_recs): #TODO Just getting all the IPs for now; filter later ips = geoproc_library.all_ipv4s(pathline) dprint("Debug: Found this many IP's: %d.\n\t%r" % (len(ips), ips)) #Can we get a date? maybe_timestamp = None maybe_timestamp_match = dfxml.rx_rfc822datetime.search(pathline) if maybe_timestamp_match: thestring = maybe_timestamp_match.string thespan = maybe_timestamp_match.span() thedatestring = thestring[thespan[0]:thespan[1]] try: maybe_timestamp = dfxml.dftime(thedatestring) except: sys.stderr.write("Warning: An error occured trying to parse time input.\nInput:%r\nStack trace:\n" % thedatestring) sys.stderr.write(traceback.format_exc()) sys.stderr.write("\n") #Don't stop here. dprint("Debug: Believed timestamp: %r." % maybe_timestamp) #Now that we have a date, can we get locations? if maybe_timestamp: #Can we get a single recipient? (This is, of course, not guaranteed to be the owner.) sole_recipient = None delivered_to_headers = message.get_all("Delivered-To") to_headers = message.get_all("To") if delivered_to_headers and len(delivered_to_headers) == 1: sole_recipient = delivered_to_headers[0] elif to_headers and len(to_headers) == 1 and len(to_headers[0].split("\n")) == 1: sole_recipient = to_headers[0] all_ip_locations = geoproc_library.ips_to_locations(lookupcur, maybe_timestamp.datetime(), ips) dprint("Debug: Fetched these IP location records:\n\t%r" % all_ip_locations) for ip in ips: outdict = {"fiwalk_id":fiwalk_id} #TODO Use annodb to get TSK identifiers outdict["message_index"] = messageno outdict["ipv4"] = ip outdict["received_path_index"] = pathindex outdict["received_path_length"] = pathlength outdict["received_header_text"] = pathline outdict["database_queried"] = all_ip_locations is not None outdict["believed_timestamp"] = str(maybe_timestamp) outdict["sole_recipient_domain_is_webmail"] = geoproc_library.in_webmail_domain(sole_recipient) if all_ip_locations is not None and ip in all_ip_locations: rec = all_ip_locations[ip] outdict["latitude"] = rec.get("latitude") outdict["longitude"] = rec.get("longitude") outdict["postalCode"] = rec.get("postalCode") outdict["maxmind_ipv4_time"] = dfxml.dftime(rec.get("maxmind_ipv4_time")).iso8601() if rec.get("country"): outdict["country"] = rec["country"] if rec.get("region"): outdict["region"] = rec["region"] if rec.get("city"): outdict["city"] = rec["city"] dprint("Debug: Checking for IP notes for %r." % ip) if ip in ip_notes_histogram: dprint("Debug: Formatting notes for %r." % ip) notedict = ip_notes_histogram[ip] notelist = sorted(notedict.keys()) notes_to_format = [] for note in notelist: notes_to_format.append("%d %r" % (notedict[note], note)) outdict["ipv4_be_notes"] = "; ".join(notes_to_format) outdict["ipv4_be_has_cksum_or_socket"] = "sockaddr" in outdict["ipv4_be_notes"] or "cksum-ok" in outdict["ipv4_be_notes"] dprint("Debug: Outdict just before inserting:\n\t%r" % outdict) geoproc_library.insert_db(outcur, "email_files_votes", outdict) outconn.commit() dprint("Debug: Done.")