Beispiel #1
0
def main():
    global args
    #Connect to anno db if available
    annoconn, annocur = geoproc_library.connect_to_fs_anno_db(args.anno)

    #Connect to db
    cfg = geoproc_cfg.config
    refconn = mysql.connector.Connect(
      host=cfg.get("mysql", "maxmind_server"),
      user=cfg.get("mysql", "maxmind_read_username"),
      password=geoproc_cfg.db_password("maxmind_read_password_file"),
      db=cfg.get("mysql", "maxmind_schema"),
      use_unicode=True
    )
    if refconn is None:
        raise Exception("Error: Could not define lookup cursor.")
    refcur = refconn.cursor(cursor_class=geoproc_cfg.MySQLCursorDict)

    outconn = sqlite3.connect("ipv4s_votes.db")
    outconn.isolation_level = "EXCLUSIVE"
    outconn.row_factory = sqlite3.Row
    outcur = outconn.cursor()

    outcur.execute(SQL_CREATE_IPV4S_VOTES)

    pairing_dict = collections.defaultdict(list)
    ip_set = set([])
    for (ipno, (forensic_path, ipv4, ipv4_notes)) in enumerate(geoproc_library.bulk_extractor_ips(args.be_dir)):
        pairing_dict[forensic_path].append((ipv4, ipv4_notes))
        ip_set.add(ipv4)

    #Unfortunately, there isn't much to do for timestamps without file system or network time information. #TODO Add time interface
    dummy_dftime = dfxml.dftime("2009-05-01T00:00:00Z")

    ips_to_locs = geoproc_library.ips_to_locations(refcur, None, ip_set)

    for forensic_path in pairing_dict:
        #Determine if we have a pair
        entries_at_path = pairing_dict[forensic_path]
        pair_found = len(entries_at_path) == 2
        for (ipv4, ipv4_notes) in entries_at_path:
            outdict = dict()
            outdict["believed_timestamp"] = dummy_dftime.iso8601()
            outdict["forensic_path"] = forensic_path
            outdict["ipv4"] = ipv4
            outdict["ipv4_notes"] = ipv4_notes
            if "cksum-bad" in ipv4_notes:
                outdict["cksum_ok"] = False
            elif "cksum-ok" in ipv4_notes:
                outdict["cksum_ok"] = True
            #None, otherwise
            outdict["is_socket_address"] = "sockaddr" in ipv4_notes
            outdict["pair_found"] = pair_found
            if "(src)" in ipv4_notes:
                outdict["src_or_dst"] = "src"
            elif "dst" in ipv4_notes:
                outdict["src_or_dst"] = "dst"
            #None, otherwise 
            annorecs = geoproc_library.forensic_path_to_anno_recs(annocur, outdict["forensic_path"])
            if annorecs and len(annorecs) > 1:
                sys.stderr.write("Warning: Multiple files found to own forensic path %r. Only using first.  This may cause strange results.\n" % outdict["forensic_path"])
            if annorecs and len(annorecs) > 0:
                annorec = annorecs[0]
                outdict["obj_id"] = annorec.get("obj_id")
                outdict["fs_obj_id"] = annorec.get("fs_obj_id")
                outdict["fiwalk_id"] = annorec.get("fiwalk_id")

            if ipv4 in ips_to_locs:
                for key in [
                  "maxmind_ipv4_time",
                  "country",
                  "region",
                  "city",
                  "postalCode",
                  "latitude",
                  "longitude"
                ]:
                    outdict[key] = ips_to_locs[ipv4][key]

            geoproc_library.insert_db(outcur, "ipv4s_votes", outdict)
    outconn.commit()
def main():
    global args

    #Set up lookup database connection
    cfg = geoproc_cfg.config
    lookupconn = None
    lookupcur = None
    try:
        import mysql.connector as mdb
        lookupconn = mdb.connect(
          host=cfg.get("mysql", "maxmind_server"),
          user=cfg.get("mysql", "maxmind_read_username"),
          password=geoproc_cfg.db_password("maxmind_read_password_file"),
          db=cfg.get("mysql", "maxmind_schema"),
          use_unicode=True
        )
        lookupcur = lookupconn.cursor(cursor_class=geoproc_cfg.MySQLCursorDict)
    except:
        sys.stderr.write("Warning: Could not connect to database. Proceeding without database support.\n")
        pass

    #Connect to annodb
    annoconn, annocur = geoproc_library.connect_to_fs_anno_db(args.annodb)

    #Verify input
    manifest_path = os.path.join(args.emaildir, "manifest.txt")
    if not os.path.isfile(manifest_path):
        raise Exception("Error: manifest.txt not found in input directory.")

    #Ingest BE ips, if available
    #Stash in (once-tested) histogram.
    #Dictionary key: ipv4 address
    #Dictionary value: (notes, tally) default dictionary.
    ip_notes_histogram = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
    if args.bulk_extractor_output:
        for (forensic_path, ipv4, ipv4_notes) in geoproc_library.bulk_extractor_ips(args.bulk_extractor_output):
            ip_notes_histogram[ipv4][ipv4_notes] += 1
    dprint("Debug: Number of IPv4s with notes: %d." % len(ip_notes_histogram.keys()))

    #Set up output database
    outdbpath = os.path.join(args.outdir, "email_files_votes.db")
    if os.path.isfile(outdbpath):
        raise Exception("Error: Output database already exists. This script won't overwrite. Aborting.")
    outconn = sqlite3.connect(outdbpath)
    outconn.isolation_level = "EXCLUSIVE"
    outconn.row_factory = sqlite3.Row
    outcur = outconn.cursor()
    outcur.execute(SQL_CREATE_EMAIL_FILES_VOTES)

    for (fiwalk_id, messageno, message) in emails_in_dir_manifest(manifest_path):
        dprint("Debug: Analyzing a record from fiwalk_id %r." % fiwalk_id)
        #print(repr(type(message)))
        #for i in message.keys():
        #    print('%r: %r' % (i, message.get_all(i)))
        received_recs = message.get_all("Received")
        if not received_recs:
            continue
        pathlength = len(received_recs)
        for (pathindex, pathline) in enumerate(received_recs):
            #TODO Just getting all the IPs for now; filter later
            ips = geoproc_library.all_ipv4s(pathline)
            dprint("Debug: Found this many IP's: %d.\n\t%r" % (len(ips), ips))
            
            #Can we get a date?
            maybe_timestamp = None
            maybe_timestamp_match = dfxml.rx_rfc822datetime.search(pathline)
            if maybe_timestamp_match:
                thestring = maybe_timestamp_match.string
                thespan = maybe_timestamp_match.span()
                thedatestring = thestring[thespan[0]:thespan[1]]
                try:
                    maybe_timestamp = dfxml.dftime(thedatestring)
                except:
                    sys.stderr.write("Warning: An error occured trying to parse time input.\nInput:%r\nStack trace:\n" % thedatestring)
                    sys.stderr.write(traceback.format_exc())
                    sys.stderr.write("\n")
                    #Don't stop here.
            dprint("Debug: Believed timestamp: %r." % maybe_timestamp)
            
            #Now that we have a date, can we get locations?
            if maybe_timestamp:

                #Can we get a single recipient?  (This is, of course, not guaranteed to be the owner.)
                sole_recipient = None
                delivered_to_headers = message.get_all("Delivered-To")
                to_headers = message.get_all("To")
                if delivered_to_headers and len(delivered_to_headers) == 1:
                    sole_recipient = delivered_to_headers[0]
                elif to_headers and len(to_headers) == 1 and len(to_headers[0].split("\n")) == 1:
                    sole_recipient = to_headers[0]
                all_ip_locations = geoproc_library.ips_to_locations(lookupcur, maybe_timestamp.datetime(), ips)
                dprint("Debug: Fetched these IP location records:\n\t%r" % all_ip_locations)
                for ip in ips:
                    outdict = {"fiwalk_id":fiwalk_id}
                    #TODO Use annodb to get TSK identifiers
                    outdict["message_index"] = messageno
                    outdict["ipv4"] = ip
                    outdict["received_path_index"] = pathindex
                    outdict["received_path_length"] = pathlength
                    outdict["received_header_text"] = pathline
                    outdict["database_queried"] = all_ip_locations is not None
                    outdict["believed_timestamp"] = str(maybe_timestamp)
                    outdict["sole_recipient_domain_is_webmail"] = geoproc_library.in_webmail_domain(sole_recipient)
                    if all_ip_locations is not None and ip in all_ip_locations:
                        rec = all_ip_locations[ip]
                        outdict["latitude"] = rec.get("latitude")
                        outdict["longitude"] = rec.get("longitude")
                        outdict["postalCode"] = rec.get("postalCode")
                        outdict["maxmind_ipv4_time"] = dfxml.dftime(rec.get("maxmind_ipv4_time")).iso8601()
                        if rec.get("country"):
                            outdict["country"] = rec["country"]
                        if rec.get("region"):
                            outdict["region"] = rec["region"]
                        if rec.get("city"):
                            outdict["city"] = rec["city"]
                        dprint("Debug: Checking for IP notes for %r." % ip)
                        if ip in ip_notes_histogram:
                            dprint("Debug: Formatting notes for %r." % ip)
                            notedict = ip_notes_histogram[ip]
                            notelist = sorted(notedict.keys())
                            notes_to_format = []
                            for note in notelist:
                                notes_to_format.append("%d %r" % (notedict[note], note))
                            outdict["ipv4_be_notes"] = "; ".join(notes_to_format)
                            outdict["ipv4_be_has_cksum_or_socket"] = "sockaddr" in outdict["ipv4_be_notes"] or "cksum-ok" in outdict["ipv4_be_notes"]
                        dprint("Debug: Outdict just before inserting:\n\t%r" % outdict)
                    geoproc_library.insert_db(outcur, "email_files_votes", outdict)
    outconn.commit()
    dprint("Debug: Done.")