def get_ff_cache(profile_dir, store_body=False): cache_dir = os.path.join(profile_dir, "Cache") if not os.path.isdir(cache_dir): return [] # Firefox updated the cache dir structure since our study cache_map = os.path.join(cache_dir, "_CACHE_MAP_") cache_dump = os.path.join(BASE_TMP_DIR, append_timestamp("cache") + rand_str()) create_dir(cache_dump) subprocess.call([PERL_PATH, CACHE_PERL_SCRIPT, cache_map, "--recover=" + cache_dump]) cache_items = [] db_items = ("Etag", "Request String", "Expires", "Cache-Control") for fname in glob(os.path.join(cache_dump, "*_metadata")): item = {} try: with open(fname) as f: metadata = f.read() item = parse_metadata(metadata) for db_item in db_items: if db_item not in item: item[db_item] = "" # If a response includes both an Expires header and a max-age # directive, the max-age directive overrides the Expires header # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html) expiry_delta_sec = 0 if "Expires" in item: # parse expiry date expiry = parse_date(item["Expires"]) if expiry: expiry_delta = expiry - datetime.now() expiry_delta_sec = expiry_delta.total_seconds() if "Cache-Control:" in item: # parse max-age directive cache_directives =\ parse_cache_control_header(item["Cache-Control"], cls=ResponseCacheControl) if "max-age" in cache_directives: expiry_delta_sec = cache_directives["max-age"] if expiry_delta_sec < DELTA_MONTH: continue item["Expiry-Delta"] = expiry_delta_sec with open(fname[:-9]) as f: data = f.read() item["Body"] = data if store_body else "" # store as BLOB item["Hash"] = hash_text(base64.b64encode(data)) except IOError as exc: print "Error processing cache: %s: %s" % (exc, traceback.format_exc()) cache_items.append(item) if os.path.isdir(cache_dump): shutil.rmtree(cache_dump) return cache_items
def get_ff_cache(profile_dir, store_body=False): cache_dir = os.path.join(profile_dir, "Cache") if not os.path.isdir(cache_dir): return [] # Firefox updated the cache dir structure since our study cache_map = os.path.join(cache_dir, "_CACHE_MAP_") cache_dump = os.path.join(BASE_TMP_DIR, append_timestamp("cache") + rand_str()) create_dir(cache_dump) subprocess.call( [PERL_PATH, CACHE_PERL_SCRIPT, cache_map, "--recover=" + cache_dump]) cache_items = [] db_items = ("Etag", "Request String", "Expires", "Cache-Control") for fname in glob(os.path.join(cache_dump, "*_metadata")): item = {} try: with open(fname) as f: metadata = f.read() item = parse_metadata(metadata) for db_item in db_items: if db_item not in item: item[db_item] = "" # If a response includes both an Expires header and a max-age # directive, the max-age directive overrides the Expires header # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html) expiry_delta_sec = 0 if "Expires" in item: # parse expiry date expiry = parse_date(item["Expires"]) if expiry: expiry_delta = expiry - datetime.now() expiry_delta_sec = expiry_delta.total_seconds() if "Cache-Control:" in item: # parse max-age directive cache_directives =\ parse_cache_control_header(item["Cache-Control"], cls=ResponseCacheControl) if "max-age" in cache_directives: expiry_delta_sec = cache_directives["max-age"] if expiry_delta_sec < DELTA_MONTH: continue item["Expiry-Delta"] = expiry_delta_sec with open(fname[:-9]) as f: data = f.read() item["Body"] = data if store_body else "" # store as BLOB item["Hash"] = hash_text(base64.b64encode(data)) except IOError as exc: print "Error processing cache: %s: %s" % (exc, traceback.format_exc()) cache_items.append(item) if os.path.isdir(cache_dump): shutil.rmtree(cache_dump) return cache_items
def insert_canvas_event(cursor, visit_info, be): meta_id = 0 if be.event_type in cm.CANVAS_ALL_EVENTS: metadata_hash = hash_text(be.log_text) cursor.execute("SELECT id FROM metadata WHERE hash=:Hash and " "event_type=:event_type", {"Hash": metadata_hash, "event_type": be.event_type}) meta_id = cursor.fetchone() if meta_id is None: cursor.execute("INSERT INTO metadata VALUES (?, ?, ?, ?)", (None, be.event_type, be.log_text, metadata_hash)) meta_id = cursor.lastrowid else: meta_id = meta_id[0] # fetchone returns a tuple cursor.execute("INSERT INTO canvas VALUES (?, ?, ?, ?, ?, ?, ?, ?)", (None, visit_info.visit_id, be.url, be.js_file, be.js_line, be.event_type, 0, meta_id)) return cursor.lastrowid
def insert_canvas_event(cursor, visit_info, be): meta_id = 0 if be.event_type in cm.CANVAS_ALL_EVENTS: metadata_hash = hash_text(be.log_text) cursor.execute( "SELECT id FROM metadata WHERE hash=:Hash and " "event_type=:event_type", { "Hash": metadata_hash, "event_type": be.event_type }) meta_id = cursor.fetchone() if meta_id is None: cursor.execute("INSERT INTO metadata VALUES (?, ?, ?, ?)", (None, be.event_type, be.log_text, metadata_hash)) meta_id = cursor.lastrowid else: meta_id = meta_id[0] # fetchone returns a tuple cursor.execute("INSERT INTO canvas VALUES (?, ?, ?, ?, ?, ?, ?, ?)", (None, visit_info.visit_id, be.url, be.js_file, be.js_line, be.event_type, 0, meta_id)) return cursor.lastrowid