def main(): global args global HTML_DOC_HEAD global HTML_DOC_FOOT subs_dict = dict() subs_dict["report_title"] = args.title or HTML_DEFAULT_TITLE #Start output print(HTML_DOC_HEAD % subs_dict) #Make list of directories to summarize. geoproc_results_dirs = geoproc_library.get_results_dirs(args.results) #Build list of statuses stats = aggregate_statuses(geoproc_results_dirs) do_toc() do_processing_stats(stats) do_input_listing(geoproc_results_dirs) #Finish output do_timestamp() print(HTML_DOC_FOOT)
def main(): global args if os.path.isfile(args.output_db): if args.zap: os.remove(args.output_db) else: raise Exception("Output database already exists; aborting.") #Connect to location database to get names (necessary for wildcard matching, like if we just found a city) config = geoproc_cfg.config lookupconn = mdb.Connect( host=config.get("mysql", "maxmind_server"), user=config.get("mysql", "maxmind_read_username"), password=geoproc_cfg.db_password("maxmind_read_password_file"), db=config.get("mysql", "maxmind_schema"), use_unicode=True ) lookupcur = lookupconn.cursor(cursor_class=geoproc_cfg.MySQLCursorDict) #Maybe connect to ground truth gtconn = None gtcur = None if args.ground_truth: gtconn = sqlite3.connect(args.ground_truth) gtconn.row_factory = sqlite3.Row #Don't lock database gtcur = gtconn.cursor() results_dir_list = geoproc_library.get_results_dirs(args.input_root) dprint("Aggregating %d directories." % len(results_dir_list)) #Connect to output database outconn = sqlite3.connect(args.output_db) outconn.isolation_level = "EXCLUSIVE" outconn.row_factory = sqlite3.Row outcur = outconn.cursor() def add_columns(outcur, table_name): #Simple aggregate table: Just gets column for image_id outcur.execute("ALTER TABLE %s ADD COLUMN image_id TEXT;" % table_name) #Weighted aggregate table: Gets other columns to determine vote accuracy outcur.execute("CREATE TABLE %s_weighted AS SELECT * FROM %s;" % (table_name, table_name)) outcur.execute("ALTER TABLE %s_weighted ADD COLUMN number_possible_locations NUMBER" % table_name) for bcol in ["country", "region", "city", "location"]: outcur.execute("ALTER TABLE %s_weighted ADD COLUMN correct_%s NUMBER;" % (table_name, bcol)) outcur.execute(analyze_cookie_files.SQL_CREATE_COOKIE_FILES_VOTES) add_columns(outcur, "cookie_files_votes") outcur.execute(analyze_email_files.SQL_CREATE_EMAIL_FILES_VOTES) add_columns(outcur, "email_files_votes") outcur.execute(analyze_exif_headers.SQL_CREATE_EXIF_HEADERS_VOTES) add_columns(outcur, "exif_headers_votes") outcur.execute(analyze_ipv4s.SQL_CREATE_IPV4S_VOTES) add_columns(outcur, "ipv4s_votes") for results_dir in results_dir_list: try: ingest_table(outcur, lookupcur, gtcur, results_dir, "analyze_cookie_files.sh", "cookie_files_votes.db", "cookie_files_votes") ingest_table(outcur, lookupcur, gtcur, results_dir, "analyze_email_files.sh", "email_files_votes.db", "email_files_votes") ingest_table(outcur, lookupcur, gtcur, results_dir, "analyze_exif_headers.sh", "exif_headers_votes.db", "exif_headers_votes") ingest_table(outcur, lookupcur, gtcur, results_dir, "analyze_ipv4s.sh", "ipv4s_votes.db", "ipv4s_votes") except: dprint("Debug: Error occurred on results_dir %r." % results_dir) raise outconn.commit()
parser.add_argument("--process_dir", help="Use this GeoProc output directory to find all SQLite files, including votes and file system annotations. If another *_votes argument is passed, that argument will override.") parser.add_argument("--cookie_files_votes", help="Use this SQLite file for cookie file records.") parser.add_argument("--email_files_votes", help="Use this SQLite file for email file records.") parser.add_argument("--exif_headers_votes", help="Use this SQLite file for EXIF records.") parser.add_argument("--fs_anno_dir", help="Directory containing the output of 'verify_fiwalk_versus_tsk_db.sh' output.") parser.add_argument("-a", "--anonymize", action="store_true", help="Do not print file system entries.") parser.add_argument("-p", "--precision_db", dest="precision_db", help="Database of precision for feature types.") args = parser.parse_args() if args.process_dir: dprint("Debug: Getting results databases from process_dir argument.") results_dir_list = geoproc_library.get_results_dirs(args.process_dir) if len(results_dir_list) != 1: sys.stderr.write("Error: --process_dir argument is not a singular GeoProc output directory.\n") sys.exit(1) #Do an extra test for TSK anno if not args.__dict__.get("fs_anno_dir"): if success.success(os.path.join(args.process_dir, "verify_fiwalk_versus_tsk_db.sh.status.log")): args.__dict__["fs_anno_dir"] = os.path.join(args.process_dir, "verify_fiwalk_versus_tsk_db.sh") for (args_param, analysis_dir, analysis_db) in TABLE_SCRIPT_DB: dprint("Debug: Testing %r." % ((args_param, analysis_dir, analysis_db),)) if args.__dict__.get(args_param): dprint("Argument already present.") continue status_log_path = os.path.join(args.process_dir, analysis_dir + ".status.log") if not success.success(status_log_path):