コード例 #1
0
ファイル: summarize.py プロジェクト: ajnelson/geoproc
def main():
    global args
    global HTML_DOC_HEAD
    global HTML_DOC_FOOT

    subs_dict = dict()
    subs_dict["report_title"] = args.title or HTML_DEFAULT_TITLE

    #Start output
    print(HTML_DOC_HEAD % subs_dict)
    #Make list of directories to summarize.
    geoproc_results_dirs = geoproc_library.get_results_dirs(args.results)
    #Build list of statuses
    stats = aggregate_statuses(geoproc_results_dirs)

    do_toc()
    do_processing_stats(stats)
    do_input_listing(geoproc_results_dirs)

    #Finish output
    do_timestamp()
    print(HTML_DOC_FOOT)
コード例 #2
0
ファイル: aggregate.py プロジェクト: ajnelson/geoproc
def main():
    global args

    if os.path.isfile(args.output_db):
        if args.zap:
            os.remove(args.output_db)
        else:
            raise Exception("Output database already exists; aborting.")

    #Connect to location database to get names (necessary for wildcard matching, like if we just found a city)
    config = geoproc_cfg.config
    lookupconn = mdb.Connect(
      host=config.get("mysql", "maxmind_server"),
      user=config.get("mysql", "maxmind_read_username"),
      password=geoproc_cfg.db_password("maxmind_read_password_file"),
      db=config.get("mysql", "maxmind_schema"),
      use_unicode=True
    )
    lookupcur = lookupconn.cursor(cursor_class=geoproc_cfg.MySQLCursorDict)

    #Maybe connect to ground truth
    gtconn = None
    gtcur = None
    if args.ground_truth:
        gtconn = sqlite3.connect(args.ground_truth)
        gtconn.row_factory = sqlite3.Row
        #Don't lock database
        gtcur = gtconn.cursor()

    results_dir_list = geoproc_library.get_results_dirs(args.input_root)
    dprint("Aggregating %d directories." % len(results_dir_list))

    #Connect to output database
    outconn = sqlite3.connect(args.output_db)
    outconn.isolation_level = "EXCLUSIVE"
    outconn.row_factory = sqlite3.Row
    outcur = outconn.cursor()

    def add_columns(outcur, table_name):
        #Simple aggregate table: Just gets column for image_id
        outcur.execute("ALTER TABLE %s ADD COLUMN image_id TEXT;" % table_name)

        #Weighted aggregate table: Gets other columns to determine vote accuracy
        outcur.execute("CREATE TABLE %s_weighted AS SELECT * FROM %s;" % (table_name, table_name))
        outcur.execute("ALTER TABLE %s_weighted ADD COLUMN number_possible_locations NUMBER" % table_name)
        for bcol in ["country", "region", "city", "location"]:
            outcur.execute("ALTER TABLE %s_weighted ADD COLUMN correct_%s NUMBER;" % (table_name, bcol))

    outcur.execute(analyze_cookie_files.SQL_CREATE_COOKIE_FILES_VOTES)
    add_columns(outcur, "cookie_files_votes")
    outcur.execute(analyze_email_files.SQL_CREATE_EMAIL_FILES_VOTES)
    add_columns(outcur, "email_files_votes")
    outcur.execute(analyze_exif_headers.SQL_CREATE_EXIF_HEADERS_VOTES)
    add_columns(outcur, "exif_headers_votes")
    outcur.execute(analyze_ipv4s.SQL_CREATE_IPV4S_VOTES)
    add_columns(outcur, "ipv4s_votes")

    for results_dir in results_dir_list:
        try:
            ingest_table(outcur, lookupcur, gtcur, results_dir, "analyze_cookie_files.sh", "cookie_files_votes.db", "cookie_files_votes")
            ingest_table(outcur, lookupcur, gtcur, results_dir, "analyze_email_files.sh", "email_files_votes.db", "email_files_votes")
            ingest_table(outcur, lookupcur, gtcur, results_dir, "analyze_exif_headers.sh", "exif_headers_votes.db", "exif_headers_votes")
            ingest_table(outcur, lookupcur, gtcur, results_dir, "analyze_ipv4s.sh", "ipv4s_votes.db", "ipv4s_votes")
        except:
            dprint("Debug: Error occurred on results_dir %r." % results_dir)
            raise
    outconn.commit()
コード例 #3
0
ファイル: make_kml.py プロジェクト: ajnelson/geoproc
    parser.add_argument("--process_dir", help="Use this GeoProc output directory to find all SQLite files, including votes and file system annotations.  If another *_votes argument is passed, that argument will override.")
    parser.add_argument("--cookie_files_votes", help="Use this SQLite file for cookie file records.")
    parser.add_argument("--email_files_votes", help="Use this SQLite file for email file records.")
    parser.add_argument("--exif_headers_votes", help="Use this SQLite file for EXIF records.")
    parser.add_argument("--fs_anno_dir", help="Directory containing the output of 'verify_fiwalk_versus_tsk_db.sh' output.")

    parser.add_argument("-a", "--anonymize", action="store_true", help="Do not print file system entries.")

    parser.add_argument("-p", "--precision_db", dest="precision_db", help="Database of precision for feature types.")

    args = parser.parse_args()

    if args.process_dir:
        dprint("Debug: Getting results databases from process_dir argument.")
        results_dir_list = geoproc_library.get_results_dirs(args.process_dir)
        if len(results_dir_list) != 1:
            sys.stderr.write("Error: --process_dir argument is not a singular GeoProc output directory.\n")
            sys.exit(1)
        #Do an extra test for TSK anno
        if not args.__dict__.get("fs_anno_dir"):
            if success.success(os.path.join(args.process_dir, "verify_fiwalk_versus_tsk_db.sh.status.log")):
                args.__dict__["fs_anno_dir"] = os.path.join(args.process_dir, "verify_fiwalk_versus_tsk_db.sh")

        for (args_param, analysis_dir, analysis_db) in TABLE_SCRIPT_DB:
            dprint("Debug: Testing %r." % ((args_param, analysis_dir, analysis_db),))
            if args.__dict__.get(args_param):
                dprint("Argument already present.")
                continue
            status_log_path = os.path.join(args.process_dir, analysis_dir + ".status.log")
            if not success.success(status_log_path):