def taper_off_orphans(conn): now = time.time() plans = config.items("plan") idx = 0 del_count = 0 for name, plan in plans: plan_info = planparser.parse_plan(name, plan, config, idx) idx += 1 # everything that has no more data-points taper_until_ts = datetime.datetime.fromtimestamp(now - plan_info["keep_data"]) delete_until_ts = None print "Taper off orphans ", name, "(has", conn.files.find({"uploadDate": {"$lte": taper_until_ts}, "tag": plan_info["tag"]}).count(), "candidates)" for file in conn.files.find({"uploadDate": {"$lte": taper_until_ts}, "tag": plan_info["tag"]}, sort = [("uploadDate", pymongo.DESCENDING)]): if not delete_until_ts: delete_until_ts = file["uploadDate"] - datetime.timedelta(seconds = plan_info["keep_orphaned_screenshots"]) elif delete_until_ts < file["uploadDate"]: conn.fs.delete(file["_id"]) del_count += 1 if del_count % 1024 == 0: print "deleted", del_count, "files." else: delete_until_ts = None
def global_cleanup(conn): now = time.time() plans = config.items("plan") idx = 0 for name, plan in plans: plan_info = planparser.parse_plan(name, plan, config, idx) idx += 1 print "Cleaning", name, "..." # remove data keep_until_ts = datetime.datetime.fromtimestamp(now - plan_info["keep_data"]) keep_until = keep_until_ts.strftime("%Y-%m-%d %H:%M:%S") doc_count = conn.results.find({"tag": name, "timestamp": {"$lte": keep_until}}).count() doc_count += conn.mh_results.find({"tag": name, "timestamp": {"$lte": keep_until}}).count() conn.results.remove({"tag": name, "timestamp": {"$lte": keep_until}}) conn.mh_results.remove({"tag": name, "timestamp": {"$lte": keep_until}}) # remove screenshots keep_until_ts = datetime.datetime.fromtimestamp(now - plan_info["keep_screenshots"]) for file in conn.files.find({"uploadDate": {"$lte": keep_until_ts}, "tag": plan_info["tag"]}): conn.fs.delete(file["_id"]) print doc_count, "results deleted."
def global_cleanup(conn): now = time.time() plans = config.items("plan") idx = 0 for name, plan in plans: plan_info = planparser.parse_plan(name, plan, config, idx) idx += 1 print "Cleaning", name, "..." # remove data keep_until_ts = datetime.datetime.fromtimestamp(now - plan_info["keep_data"]) keep_until = keep_until_ts.strftime("%Y-%m-%d %H:%M:%S") doc_count = conn.results.find({ "tag": name, "timestamp": { "$lte": keep_until } }).count() doc_count += conn.mh_results.find({ "tag": name, "timestamp": { "$lte": keep_until } }).count() conn.results.remove({"tag": name, "timestamp": {"$lte": keep_until}}) conn.mh_results.remove({ "tag": name, "timestamp": { "$lte": keep_until } }) # remove screenshots keep_until_ts = datetime.datetime.fromtimestamp( now - plan_info["keep_screenshots"]) for file in conn.files.find({ "uploadDate": { "$lte": keep_until_ts }, "tag": plan_info["tag"] }): conn.fs.delete(file["_id"]) print doc_count, "results deleted."
def taper_off_orphans(conn): now = time.time() plans = config.items("plan") idx = 0 del_count = 0 for name, plan in plans: plan_info = planparser.parse_plan(name, plan, config, idx) idx += 1 # everything that has no more data-points taper_until_ts = datetime.datetime.fromtimestamp( now - plan_info["keep_data"]) delete_until_ts = None print "Taper off orphans ", name, "(has", conn.files.find({ "uploadDate": { "$lte": taper_until_ts }, "tag": plan_info["tag"] }).count(), "candidates)" for file in conn.files.find( { "uploadDate": { "$lte": taper_until_ts }, "tag": plan_info["tag"] }, sort=[("uploadDate", pymongo.DESCENDING)]): if not delete_until_ts: delete_until_ts = file["uploadDate"] - datetime.timedelta( seconds=plan_info["keep_orphaned_screenshots"]) elif delete_until_ts < file["uploadDate"]: conn.fs.delete(file["_id"]) del_count += 1 if del_count % 1024 == 0: print "deleted", del_count, "files." else: delete_until_ts = None
work_queues = {} plans = config.items("plan") for name, plan in plans: work_queues[name] = [] thread.start_new_thread(worker_thread, (name, work_queues[name],)) while True: # one minute scheduler... ts = time.time() urls_to_handle = set() idx = 0 for name, plan in plans: plan_info = planparser.parse_plan(name, plan, config, idx) idx += 1 plan_info["tag"] = name work_queue = work_queues[name] if is_plan_scheduled(name, plan_info, ts): for url in plan_info["urls"]: if url not in urls_to_handle: urls_to_handle.add(url) already_enqueued = False for work in work_queue: if work["url"] == url: already_enqueued = True if not already_enqueued: work_queue.append({"url": url, "plan_info": plan_info}) if config.get("config", "keep_running") == "False":
work_queues[name] = [] thread.start_new_thread(worker_thread, ( name, work_queues[name], )) while True: # one minute scheduler... ts = time.time() urls_to_handle = set() idx = 0 for name, plan in plans: plan_info = planparser.parse_plan(name, plan, config, idx) idx += 1 plan_info["tag"] = name work_queue = work_queues[name] if is_plan_scheduled(name, plan_info, ts): for url in plan_info["urls"]: if url not in urls_to_handle: urls_to_handle.add(url) already_enqueued = False for work in work_queue: if work["url"] == url: already_enqueued = True if not already_enqueued: work_queue.append({ "url": url, "plan_info": plan_info